62 files changed, 48733 insertions, 0 deletions
diff --git a/prism/api_pack.c b/prism/api_pack.c
new file mode 100644
index 0000000000..98509ae65c
--- /dev/null
+++ b/prism/api_pack.c
@@ -0,0 +1,276 @@
+#include "prism/extension.h"
+
+#ifdef PRISM_EXCLUDE_PACK
+
+void
+Init_prism_pack(void) {}
+
+#else
+
+static VALUE rb_cPrism;
+static VALUE rb_cPrismPack;
+static VALUE rb_cPrismPackDirective;
+static VALUE rb_cPrismPackFormat;
+
+static VALUE v3_2_0_symbol;
+static VALUE pack_symbol;
+static VALUE unpack_symbol;
+
+#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
+# define UINT64T2NUM(x) ULL2NUM(x)
+# define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
+#elif SIZEOF_UINT64_T == SIZEOF_LONG
+# define UINT64T2NUM(x) ULONG2NUM(x)
+# define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
+#else
+// error No uint64_t conversion
+#endif
+
+static VALUE
+pack_type_to_symbol(pm_pack_type type) {
+    switch (type) {
+        case PM_PACK_SPACE:
+            return ID2SYM(rb_intern("SPACE"));
+        case PM_PACK_COMMENT:
+            return ID2SYM(rb_intern("COMMENT"));
+        case PM_PACK_INTEGER:
+            return ID2SYM(rb_intern("INTEGER"));
+        case PM_PACK_UTF8:
+            return ID2SYM(rb_intern("UTF8"));
+        case PM_PACK_BER:
+            return ID2SYM(rb_intern("BER"));
+        case PM_PACK_FLOAT:
+            return ID2SYM(rb_intern("FLOAT"));
+        case PM_PACK_STRING_SPACE_PADDED:
+            return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
+        case PM_PACK_STRING_NULL_PADDED:
+            return ID2SYM(rb_intern("STRING_NULL_PADDED"));
+        case PM_PACK_STRING_NULL_TERMINATED:
+            return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
+        case PM_PACK_STRING_MSB:
+            return ID2SYM(rb_intern("STRING_MSB"));
+        case PM_PACK_STRING_LSB:
+            return ID2SYM(rb_intern("STRING_LSB"));
+        case PM_PACK_STRING_HEX_HIGH:
+            return ID2SYM(rb_intern("STRING_HEX_HIGH"));
+        case PM_PACK_STRING_HEX_LOW:
+            return ID2SYM(rb_intern("STRING_HEX_LOW"));
+        case PM_PACK_STRING_UU:
+            return ID2SYM(rb_intern("STRING_UU"));
+        case PM_PACK_STRING_MIME:
+            return ID2SYM(rb_intern("STRING_MIME"));
+        case PM_PACK_STRING_BASE64:
+            return ID2SYM(rb_intern("STRING_BASE64"));
+        case PM_PACK_STRING_FIXED:
+            return ID2SYM(rb_intern("STRING_FIXED"));
+        case PM_PACK_STRING_POINTER:
+            return ID2SYM(rb_intern("STRING_POINTER"));
+        case PM_PACK_MOVE:
+            return ID2SYM(rb_intern("MOVE"));
+        case PM_PACK_BACK:
+            return ID2SYM(rb_intern("BACK"));
+        case PM_PACK_NULL:
+            return ID2SYM(rb_intern("NULL"));
+        default:
+            return Qnil;
+    }
+}
+
+static VALUE
+pack_signed_to_symbol(pm_pack_signed signed_type) {
+    switch (signed_type) {
+        case PM_PACK_UNSIGNED:
+            return ID2SYM(rb_intern("UNSIGNED"));
+        case PM_PACK_SIGNED:
+            return ID2SYM(rb_intern("SIGNED"));
+        case PM_PACK_SIGNED_NA:
+            return ID2SYM(rb_intern("SIGNED_NA"));
+        default:
+            return Qnil;
+    }
+}
+
+static VALUE
+pack_endian_to_symbol(pm_pack_endian endian) {
+    switch (endian) {
+        case PM_PACK_AGNOSTIC_ENDIAN:
+            return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
+        case PM_PACK_LITTLE_ENDIAN:
+            return ID2SYM(rb_intern("LITTLE_ENDIAN"));
+        case PM_PACK_BIG_ENDIAN:
+            return ID2SYM(rb_intern("BIG_ENDIAN"));
+        case PM_PACK_NATIVE_ENDIAN:
+            return ID2SYM(rb_intern("NATIVE_ENDIAN"));
+        case PM_PACK_ENDIAN_NA:
+            return ID2SYM(rb_intern("ENDIAN_NA"));
+        default:
+            return Qnil;
+    }
+}
+
+static VALUE
+pack_size_to_symbol(pm_pack_size size) {
+    switch (size) {
+        case PM_PACK_SIZE_SHORT:
+            return ID2SYM(rb_intern("SIZE_SHORT"));
+        case PM_PACK_SIZE_INT:
+            return ID2SYM(rb_intern("SIZE_INT"));
+        case PM_PACK_SIZE_LONG:
+            return ID2SYM(rb_intern("SIZE_LONG"));
+        case PM_PACK_SIZE_LONG_LONG:
+            return ID2SYM(rb_intern("SIZE_LONG_LONG"));
+        case PM_PACK_SIZE_8:
+            return ID2SYM(rb_intern("SIZE_8"));
+        case PM_PACK_SIZE_16:
+            return ID2SYM(rb_intern("SIZE_16"));
+        case PM_PACK_SIZE_32:
+            return ID2SYM(rb_intern("SIZE_32"));
+        case PM_PACK_SIZE_64:
+            return ID2SYM(rb_intern("SIZE_64"));
+        case PM_PACK_SIZE_P:
+            return ID2SYM(rb_intern("SIZE_P"));
+        case PM_PACK_SIZE_NA:
+            return ID2SYM(rb_intern("SIZE_NA"));
+        default:
+            return Qnil;
+    }
+}
+
+static VALUE
+pack_length_type_to_symbol(pm_pack_length_type length_type) {
+    switch (length_type) {
+        case PM_PACK_LENGTH_FIXED:
+            return ID2SYM(rb_intern("LENGTH_FIXED"));
+        case PM_PACK_LENGTH_MAX:
+            return ID2SYM(rb_intern("LENGTH_MAX"));
+        case PM_PACK_LENGTH_RELATIVE:
+            return ID2SYM(rb_intern("LENGTH_RELATIVE"));
+        case PM_PACK_LENGTH_NA:
+            return ID2SYM(rb_intern("LENGTH_NA"));
+        default:
+            return Qnil;
+    }
+}
+
+static VALUE
+pack_encoding_to_ruby(pm_pack_encoding encoding) {
+    int index;
+    switch (encoding) {
+        case PM_PACK_ENCODING_ASCII_8BIT:
+            index = rb_ascii8bit_encindex();
+            break;
+        case PM_PACK_ENCODING_US_ASCII:
+            index = rb_usascii_encindex();
+            break;
+        case PM_PACK_ENCODING_UTF_8:
+            index = rb_utf8_encindex();
+            break;
+        default:
+            return Qnil;
+    }
+    return rb_enc_from_encoding(rb_enc_from_index(index));
+}
+
+/**
+ * call-seq:
+ *   Pack::parse(version, variant, source) -> Format
+ *
+ * Parse the given source and return a format object.
+ */
+static VALUE
+pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
+    if (version_symbol != v3_2_0_symbol) {
+        rb_raise(rb_eArgError, "invalid version");
+    }
+
+    pm_pack_variant variant;
+    if (variant_symbol == pack_symbol) {
+        variant = PM_PACK_VARIANT_PACK;
+    } else if (variant_symbol == unpack_symbol) {
+        variant = PM_PACK_VARIANT_UNPACK;
+    } else {
+        rb_raise(rb_eArgError, "invalid variant");
+    }
+
+    StringValue(format_string);
+
+    const char *format = RSTRING_PTR(format_string);
+    const char *format_end = format + RSTRING_LEN(format_string);
+    pm_pack_encoding encoding = PM_PACK_ENCODING_START;
+
+    VALUE directives_array = rb_ary_new();
+
+    while (format < format_end) {
+        pm_pack_type type;
+        pm_pack_signed signed_type;
+        pm_pack_endian endian;
+        pm_pack_size size;
+        pm_pack_length_type length_type;
+        uint64_t length;
+
+        const char *directive_start = format;
+
+        pm_pack_result parse_result = pm_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
+                                                    &size, &length_type, &length, &encoding);
+
+        const char *directive_end = format;
+
+        switch (parse_result) {
+            case PM_PACK_OK:
+                break;
+            case PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
+                rb_raise(rb_eArgError, "unsupported directive");
+            case PM_PACK_ERROR_UNKNOWN_DIRECTIVE:
+                rb_raise(rb_eArgError, "unsupported directive");
+            case PM_PACK_ERROR_LENGTH_TOO_BIG:
+                rb_raise(rb_eRangeError, "pack length too big");
+            case PM_PACK_ERROR_BANG_NOT_ALLOWED:
+                rb_raise(rb_eRangeError, "bang not allowed");
+            case PM_PACK_ERROR_DOUBLE_ENDIAN:
+                rb_raise(rb_eRangeError, "double endian");
+            default:
+                rb_bug("parse result");
+        }
+
+        if (type == PM_PACK_END) {
+            break;
+        }
+
+        VALUE directive_args[9] = {
+            version_symbol,
+            variant_symbol,
+            rb_usascii_str_new(directive_start, directive_end - directive_start),
+            pack_type_to_symbol(type),
+            pack_signed_to_symbol(signed_type),
+            pack_endian_to_symbol(endian),
+            pack_size_to_symbol(size),
+            pack_length_type_to_symbol(length_type),
+            UINT64T2NUM(length)
+        };
+
+        rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cPrismPackDirective));
+    }
+
+    VALUE format_args[2];
+    format_args[0] = directives_array;
+    format_args[1] = pack_encoding_to_ruby(encoding);
+    return rb_class_new_instance(2, format_args, rb_cPrismPackFormat);
+}
+
+/**
+ * The function that gets called when Ruby initializes the prism extension.
+ */
+void
+Init_prism_pack(void) {
+    rb_cPrism = rb_define_module("Prism");
+    rb_cPrismPack = rb_define_module_under(rb_cPrism, "Pack");
+    rb_cPrismPackDirective = rb_define_class_under(rb_cPrismPack, "Directive", rb_cObject);
+    rb_cPrismPackFormat = rb_define_class_under(rb_cPrismPack, "Format", rb_cObject);
+    rb_define_singleton_method(rb_cPrismPack, "parse", pack_parse, 3);
+
+    v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
+    pack_symbol = ID2SYM(rb_intern("pack"));
+    unpack_symbol = ID2SYM(rb_intern("unpack"));
+}
+
+#endif
diff --git a/prism/config.yml b/prism/config.yml
new file mode 100644
index 0000000000..4e5b077a35
--- /dev/null
+++ b/prism/config.yml
@@ -0,0 +1,4739 @@
+errors:
+  - ALIAS_ARGUMENT
+  - ALIAS_ARGUMENT_NUMBERED_REFERENCE
+  - AMPAMPEQ_MULTI_ASSIGN
+  - ARGUMENT_AFTER_BLOCK
+  - ARGUMENT_AFTER_FORWARDING_ELLIPSES
+  - ARGUMENT_BARE_HASH
+  - ARGUMENT_BLOCK_FORWARDING
+  - ARGUMENT_BLOCK_MULTI
+  - ARGUMENT_CONFLICT_AMPERSAND
+  - ARGUMENT_CONFLICT_STAR
+  - ARGUMENT_CONFLICT_STAR_STAR
+  - ARGUMENT_FORMAL_CLASS
+  - ARGUMENT_FORMAL_CONSTANT
+  - ARGUMENT_FORMAL_GLOBAL
+  - ARGUMENT_FORMAL_IVAR
+  - ARGUMENT_FORWARDING_UNBOUND
+  - ARGUMENT_NO_FORWARDING_AMPERSAND
+  - ARGUMENT_NO_FORWARDING_ELLIPSES
+  - ARGUMENT_NO_FORWARDING_STAR
+  - ARGUMENT_NO_FORWARDING_STAR_STAR
+  - ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT
+  - ARGUMENT_SPLAT_AFTER_SPLAT
+  - ARGUMENT_TERM_PAREN
+  - ARGUMENT_UNEXPECTED_BLOCK
+  - ARRAY_ELEMENT
+  - ARRAY_EXPRESSION
+  - ARRAY_EXPRESSION_AFTER_STAR
+  - ARRAY_SEPARATOR
+  - ARRAY_TERM
+  - BEGIN_LONELY_ELSE
+  - BEGIN_TERM
+  - BEGIN_UPCASE_BRACE
+  - BEGIN_UPCASE_TERM
+  - BEGIN_UPCASE_TOPLEVEL
+  - BLOCK_PARAM_LOCAL_VARIABLE
+  - BLOCK_PARAM_PIPE_TERM
+  - BLOCK_TERM_BRACE
+  - BLOCK_TERM_END
+  - CANNOT_PARSE_EXPRESSION
+  - CANNOT_PARSE_STRING_PART
+  - CASE_EXPRESSION_AFTER_CASE
+  - CASE_EXPRESSION_AFTER_WHEN
+  - CASE_MATCH_MISSING_PREDICATE
+  - CASE_MISSING_CONDITIONS
+  - CASE_TERM
+  - CLASS_IN_METHOD
+  - CLASS_NAME
+  - CLASS_SUPERCLASS
+  - CLASS_TERM
+  - CLASS_UNEXPECTED_END
+  - CLASS_VARIABLE_BARE
+  - CONDITIONAL_ELSIF_PREDICATE
+  - CONDITIONAL_IF_PREDICATE
+  - CONDITIONAL_PREDICATE_TERM
+  - CONDITIONAL_TERM
+  - CONDITIONAL_TERM_ELSE
+  - CONDITIONAL_UNLESS_PREDICATE
+  - CONDITIONAL_UNTIL_PREDICATE
+  - CONDITIONAL_WHILE_PREDICATE
+  - CONSTANT_PATH_COLON_COLON_CONSTANT
+  - DEF_ENDLESS
+  - DEF_ENDLESS_PARAMETERS
+  - DEF_ENDLESS_SETTER
+  - DEF_NAME
+  - DEF_PARAMS_TERM
+  - DEF_PARAMS_TERM_PAREN
+  - DEF_RECEIVER
+  - DEF_RECEIVER_TERM
+  - DEF_TERM
+  - DEFINED_EXPRESSION
+  - EMBDOC_TERM
+  - EMBEXPR_END
+  - EMBVAR_INVALID
+  - END_UPCASE_BRACE
+  - END_UPCASE_TERM
+  - ESCAPE_INVALID_CONTROL
+  - ESCAPE_INVALID_CONTROL_REPEAT
+  - ESCAPE_INVALID_HEXADECIMAL
+  - ESCAPE_INVALID_META
+  - ESCAPE_INVALID_META_REPEAT
+  - ESCAPE_INVALID_UNICODE
+  - ESCAPE_INVALID_UNICODE_CM_FLAGS
+  - ESCAPE_INVALID_UNICODE_LIST
+  - ESCAPE_INVALID_UNICODE_LITERAL
+  - ESCAPE_INVALID_UNICODE_LONG
+  - ESCAPE_INVALID_UNICODE_SHORT
+  - ESCAPE_INVALID_UNICODE_TERM
+  - EXPECT_ARGUMENT
+  - EXPECT_EOL_AFTER_STATEMENT
+  - EXPECT_EXPRESSION_AFTER_AMPAMPEQ
+  - EXPECT_EXPRESSION_AFTER_COMMA
+  - EXPECT_EXPRESSION_AFTER_EQUAL
+  - EXPECT_EXPRESSION_AFTER_LESS_LESS
+  - EXPECT_EXPRESSION_AFTER_LPAREN
+  - EXPECT_EXPRESSION_AFTER_OPERATOR
+  - EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ
+  - EXPECT_EXPRESSION_AFTER_QUESTION
+  - EXPECT_EXPRESSION_AFTER_SPLAT
+  - EXPECT_EXPRESSION_AFTER_SPLAT_HASH
+  - EXPECT_EXPRESSION_AFTER_STAR
+  - EXPECT_FOR_DELIMITER
+  - EXPECT_IDENT_REQ_PARAMETER
+  - EXPECT_IN_DELIMITER
+  - EXPECT_LPAREN_AFTER_NOT_LPAREN
+  - EXPECT_LPAREN_AFTER_NOT_OTHER
+  - EXPECT_LPAREN_REQ_PARAMETER
+  - EXPECT_MESSAGE
+  - EXPECT_RBRACKET
+  - EXPECT_RPAREN
+  - EXPECT_RPAREN_AFTER_MULTI
+  - EXPECT_RPAREN_REQ_PARAMETER
+  - EXPECT_SINGLETON_CLASS_DELIMITER
+  - EXPECT_STRING_CONTENT
+  - EXPECT_WHEN_DELIMITER
+  - EXPRESSION_BARE_HASH
+  - EXPRESSION_NOT_WRITABLE
+  - EXPRESSION_NOT_WRITABLE_ENCODING
+  - EXPRESSION_NOT_WRITABLE_FALSE
+  - EXPRESSION_NOT_WRITABLE_FILE
+  - EXPRESSION_NOT_WRITABLE_LINE
+  - EXPRESSION_NOT_WRITABLE_NIL
+  - EXPRESSION_NOT_WRITABLE_NUMBERED
+  - EXPRESSION_NOT_WRITABLE_SELF
+  - EXPRESSION_NOT_WRITABLE_TRUE
+  - FLOAT_PARSE
+  - FOR_COLLECTION
+  - FOR_IN
+  - FOR_INDEX
+  - FOR_TERM
+  - GLOBAL_VARIABLE_BARE
+  - HASH_EXPRESSION_AFTER_LABEL
+  - HASH_KEY
+  - HASH_ROCKET
+  - HASH_TERM
+  - HASH_VALUE
+  - HEREDOC_IDENTIFIER
+  - HEREDOC_TERM
+  - INCOMPLETE_QUESTION_MARK
+  - INCOMPLETE_VARIABLE_CLASS
+  - INCOMPLETE_VARIABLE_CLASS_3_3
+  - INCOMPLETE_VARIABLE_INSTANCE
+  - INCOMPLETE_VARIABLE_INSTANCE_3_3
+  - INSTANCE_VARIABLE_BARE
+  - INVALID_BLOCK_EXIT
+  - INVALID_CHARACTER
+  - INVALID_COMMA
+  - INVALID_ENCODING_MAGIC_COMMENT
+  - INVALID_ESCAPE_CHARACTER
+  - INVALID_FLOAT_EXPONENT
+  - INVALID_LOCAL_VARIABLE_READ
+  - INVALID_LOCAL_VARIABLE_WRITE
+  - INVALID_MULTIBYTE_CHAR
+  - INVALID_MULTIBYTE_CHARACTER
+  - INVALID_MULTIBYTE_ESCAPE
+  - INVALID_NUMBER_BINARY
+  - INVALID_NUMBER_DECIMAL
+  - INVALID_NUMBER_FRACTION
+  - INVALID_NUMBER_HEXADECIMAL
+  - INVALID_NUMBER_OCTAL
+  - INVALID_NUMBER_UNDERSCORE_INNER
+  - INVALID_NUMBER_UNDERSCORE_TRAILING
+  - INVALID_PERCENT
+  - INVALID_PERCENT_EOF
+  - INVALID_PRINTABLE_CHARACTER
+  - INVALID_RETRY_AFTER_ELSE
+  - INVALID_RETRY_AFTER_ENSURE
+  - INVALID_RETRY_WITHOUT_RESCUE
+  - INVALID_SYMBOL
+  - INVALID_VARIABLE_GLOBAL
+  - INVALID_VARIABLE_GLOBAL_3_3
+  - INVALID_YIELD
+  - IT_NOT_ALLOWED_NUMBERED
+  - IT_NOT_ALLOWED_ORDINARY
+  - LAMBDA_OPEN
+  - LAMBDA_TERM_BRACE
+  - LAMBDA_TERM_END
+  - LIST_I_LOWER_ELEMENT
+  - LIST_I_LOWER_TERM
+  - LIST_I_UPPER_ELEMENT
+  - LIST_I_UPPER_TERM
+  - LIST_W_LOWER_ELEMENT
+  - LIST_W_LOWER_TERM
+  - LIST_W_UPPER_ELEMENT
+  - LIST_W_UPPER_TERM
+  - MALLOC_FAILED
+  - MIXED_ENCODING
+  - MODULE_IN_METHOD
+  - MODULE_NAME
+  - MODULE_TERM
+  - MULTI_ASSIGN_MULTI_SPLATS
+  - MULTI_ASSIGN_UNEXPECTED_REST
+  - NESTING_TOO_DEEP
+  - NO_LOCAL_VARIABLE
+  - NON_ASSOCIATIVE_OPERATOR
+  - NOT_EXPRESSION
+  - NUMBER_LITERAL_UNDERSCORE
+  - NUMBERED_PARAMETER_INNER_BLOCK
+  - NUMBERED_PARAMETER_IT
+  - NUMBERED_PARAMETER_ORDINARY
+  - NUMBERED_PARAMETER_OUTER_BLOCK
+  - OPERATOR_MULTI_ASSIGN
+  - OPERATOR_WRITE_ARGUMENTS
+  - OPERATOR_WRITE_BLOCK
+  - PARAMETER_ASSOC_SPLAT_MULTI
+  - PARAMETER_BLOCK_MULTI
+  - PARAMETER_CIRCULAR
+  - PARAMETER_FORWARDING_AFTER_REST
+  - PARAMETER_METHOD_NAME
+  - PARAMETER_NAME_DUPLICATED
+  - PARAMETER_NO_DEFAULT
+  - PARAMETER_NO_DEFAULT_KW
+  - PARAMETER_NUMBERED_RESERVED
+  - PARAMETER_ORDER
+  - PARAMETER_SPLAT_MULTI
+  - PARAMETER_STAR
+  - PARAMETER_UNEXPECTED_FWD
+  - PARAMETER_UNEXPECTED_NO_KW
+  - PARAMETER_WILD_LOOSE_COMMA
+  - PATTERN_ARRAY_MULTIPLE_RESTS
+  - PATTERN_CAPTURE_DUPLICATE
+  - PATTERN_CAPTURE_IN_ALTERNATIVE
+  - PATTERN_EXPRESSION_AFTER_BRACKET
+  - PATTERN_EXPRESSION_AFTER_COMMA
+  - PATTERN_EXPRESSION_AFTER_HROCKET
+  - PATTERN_EXPRESSION_AFTER_IN
+  - PATTERN_EXPRESSION_AFTER_KEY
+  - PATTERN_EXPRESSION_AFTER_PAREN
+  - PATTERN_EXPRESSION_AFTER_PIN
+  - PATTERN_EXPRESSION_AFTER_PIPE
+  - PATTERN_EXPRESSION_AFTER_RANGE
+  - PATTERN_EXPRESSION_AFTER_REST
+  - PATTERN_FIND_MISSING_INNER
+  - PATTERN_HASH_IMPLICIT
+  - PATTERN_HASH_KEY
+  - PATTERN_HASH_KEY_DUPLICATE
+  - PATTERN_HASH_KEY_INTERPOLATED
+  - PATTERN_HASH_KEY_LABEL
+  - PATTERN_HASH_KEY_LOCALS
+  - PATTERN_IDENT_AFTER_HROCKET
+  - PATTERN_LABEL_AFTER_COMMA
+  - PATTERN_REST
+  - PATTERN_TERM_BRACE
+  - PATTERN_TERM_BRACKET
+  - PATTERN_TERM_PAREN
+  - PIPEPIPEEQ_MULTI_ASSIGN
+  - REGEXP_ENCODING_OPTION_MISMATCH
+  - REGEXP_INCOMPAT_CHAR_ENCODING
+  - REGEXP_INVALID_UNICODE_RANGE
+  - REGEXP_NON_ESCAPED_MBC
+  - REGEXP_PARSE_ERROR
+  - REGEXP_TERM
+  - REGEXP_UNKNOWN_OPTIONS
+  - REGEXP_UTF8_CHAR_NON_UTF8_REGEXP
+  - RESCUE_EXPRESSION
+  - RESCUE_MODIFIER_VALUE
+  - RESCUE_TERM
+  - RESCUE_VARIABLE
+  - RETURN_INVALID
+  - SCRIPT_NOT_FOUND
+  - SINGLETON_FOR_LITERALS
+  - STATEMENT_ALIAS
+  - STATEMENT_POSTEXE_END
+  - STATEMENT_PREEXE_BEGIN
+  - STATEMENT_UNDEF
+  - STRING_CONCATENATION
+  - STRING_INTERPOLATED_TERM
+  - STRING_LITERAL_EOF
+  - STRING_LITERAL_TERM
+  - SYMBOL_INVALID
+  - SYMBOL_TERM_DYNAMIC
+  - SYMBOL_TERM_INTERPOLATED
+  - TERNARY_COLON
+  - TERNARY_EXPRESSION_FALSE
+  - TERNARY_EXPRESSION_TRUE
+  - UNARY_DISALLOWED
+  - UNARY_RECEIVER
+  - UNDEF_ARGUMENT
+  - UNEXPECTED_BLOCK_ARGUMENT
+  - UNEXPECTED_INDEX_BLOCK
+  - UNEXPECTED_INDEX_KEYWORDS
+  - UNEXPECTED_LABEL
+  - UNEXPECTED_MULTI_WRITE
+  - UNEXPECTED_PARAMETER_DEFAULT_VALUE
+  - UNEXPECTED_RANGE_OPERATOR
+  - UNEXPECTED_SAFE_NAVIGATION
+  - UNEXPECTED_TOKEN_CLOSE_CONTEXT
+  - UNEXPECTED_TOKEN_IGNORE
+  - UNTIL_TERM
+  - VOID_EXPRESSION
+  - WHILE_TERM
+  - WRITE_TARGET_IN_METHOD
+  - WRITE_TARGET_READONLY
+  - WRITE_TARGET_UNEXPECTED
+  - XSTRING_TERM
+warnings:
+  - AMBIGUOUS_BINARY_OPERATOR
+  - AMBIGUOUS_FIRST_ARGUMENT_MINUS
+  - AMBIGUOUS_FIRST_ARGUMENT_PLUS
+  - AMBIGUOUS_PREFIX_AMPERSAND
+  - AMBIGUOUS_PREFIX_STAR
+  - AMBIGUOUS_PREFIX_STAR_STAR
+  - AMBIGUOUS_SLASH
+  - COMPARISON_AFTER_COMPARISON
+  - DOT_DOT_DOT_EOL
+  - EQUAL_IN_CONDITIONAL
+  - EQUAL_IN_CONDITIONAL_3_3
+  - END_IN_METHOD
+  - DUPLICATED_HASH_KEY
+  - DUPLICATED_WHEN_CLAUSE
+  - FLOAT_OUT_OF_RANGE
+  - IGNORED_FROZEN_STRING_LITERAL
+  - INDENTATION_MISMATCH
+  - INTEGER_IN_FLIP_FLOP
+  - INVALID_CHARACTER
+  - INVALID_MAGIC_COMMENT_VALUE
+  - INVALID_NUMBERED_REFERENCE
+  - KEYWORD_EOL
+  - LITERAL_IN_CONDITION_DEFAULT
+  - LITERAL_IN_CONDITION_VERBOSE
+  - SHAREABLE_CONSTANT_VALUE_LINE
+  - SHEBANG_CARRIAGE_RETURN
+  - UNEXPECTED_CARRIAGE_RETURN
+  - UNREACHABLE_STATEMENT
+  - UNUSED_LOCAL_VARIABLE
+  - VOID_STATEMENT
+tokens:
+  # The order of the tokens at the beginning is important, because we use them
+  # for a lookup table.
+  - name: EOF
+    value: 1
+    comment: final token in the file
+  - name: BRACE_RIGHT
+    comment: "}"
+  - name: COMMA
+    comment: ","
+  - name: EMBEXPR_END
+    comment: "}"
+  - name: KEYWORD_DO
+    comment: "do"
+  - name: KEYWORD_ELSE
+    comment: "else"
+  - name: KEYWORD_ELSIF
+    comment: "elsif"
+  - name: KEYWORD_END
+    comment: "end"
+  - name: KEYWORD_ENSURE
+    comment: "ensure"
+  - name: KEYWORD_IN
+    comment: "in"
+  - name: KEYWORD_RESCUE
+    comment: "rescue"
+  - name: KEYWORD_THEN
+    comment: "then"
+  - name: KEYWORD_WHEN
+    comment: "when"
+  - name: NEWLINE
+    comment: "a newline character outside of other tokens"
+  - name: PARENTHESIS_RIGHT
+    comment: ")"
+  - name: PIPE
+    comment: "|"
+  - name: SEMICOLON
+    comment: ";"
+  # Tokens from here on are not used for lookup, and can be in any order.
+  - name: AMPERSAND
+    comment: "&"
+  - name: AMPERSAND_AMPERSAND
+    comment: "&&"
+  - name: AMPERSAND_AMPERSAND_EQUAL
+    comment: "&&="
+  - name: AMPERSAND_DOT
+    comment: "&."
+  - name: AMPERSAND_EQUAL
+    comment: "&="
+  - name: BACKTICK
+    comment: "`"
+  - name: BACK_REFERENCE
+    comment: "a back reference"
+  - name: BANG
+    comment: "! or !@"
+  - name: BANG_EQUAL
+    comment: "!="
+  - name: BANG_TILDE
+    comment: "!~"
+  - name: BRACE_LEFT
+    comment: "{"
+  - name: BRACKET_LEFT
+    comment: "["
+  - name: BRACKET_LEFT_ARRAY
+    comment: "[ for the beginning of an array"
+  - name: BRACKET_LEFT_RIGHT
+    comment: "[]"
+  - name: BRACKET_LEFT_RIGHT_EQUAL
+    comment: "[]="
+  - name: BRACKET_RIGHT
+    comment: "]"
+  - name: CARET
+    comment: "^"
+  - name: CARET_EQUAL
+    comment: "^="
+  - name: CHARACTER_LITERAL
+    comment: "a character literal"
+  - name: CLASS_VARIABLE
+    comment: "a class variable"
+  - name: COLON
+    comment: ":"
+  - name: COLON_COLON
+    comment: "::"
+  - name: COMMENT
+    comment: "a comment"
+  - name: CONSTANT
+    comment: "a constant"
+  - name: DOT
+    comment: "the . call operator"
+  - name: DOT_DOT
+    comment: "the .. range operator"
+  - name: DOT_DOT_DOT
+    comment: "the ... range operator or forwarding parameter"
+  - name: EMBDOC_BEGIN
+    comment: "=begin"
+  - name: EMBDOC_END
+    comment: "=end"
+  - name: EMBDOC_LINE
+    comment: "a line inside of embedded documentation"
+  - name: EMBEXPR_BEGIN
+    comment: "#{"
+  - name: EMBVAR
+    comment: "#"
+  - name: EQUAL
+    comment: "="
+  - name: EQUAL_EQUAL
+    comment: "=="
+  - name: EQUAL_EQUAL_EQUAL
+    comment: "==="
+  - name: EQUAL_GREATER
+    comment: "=>"
+  - name: EQUAL_TILDE
+    comment: "=~"
+  - name: FLOAT
+    comment: "a floating point number"
+  - name: FLOAT_IMAGINARY
+    comment: "a floating pointer number with an imaginary suffix"
+  - name: FLOAT_RATIONAL
+    comment: "a floating pointer number with a rational suffix"
+  - name: FLOAT_RATIONAL_IMAGINARY
+    comment: "a floating pointer number with a rational and imaginary suffix"
+  - name: GLOBAL_VARIABLE
+    comment: "a global variable"
+  - name: GREATER
+    comment: ">"
+  - name: GREATER_EQUAL
+    comment: ">="
+  - name: GREATER_GREATER
+    comment: ">>"
+  - name: GREATER_GREATER_EQUAL
+    comment: ">>="
+  - name: HEREDOC_END
+    comment: "the end of a heredoc"
+  - name: HEREDOC_START
+    comment: "the start of a heredoc"
+  - name: IDENTIFIER
+    comment: "an identifier"
+  - name: IGNORED_NEWLINE
+    comment: "an ignored newline"
+  - name: INSTANCE_VARIABLE
+    comment: "an instance variable"
+  - name: INTEGER
+    comment: "an integer (any base)"
+  - name: INTEGER_IMAGINARY
+    comment: "an integer with an imaginary suffix"
+  - name: INTEGER_RATIONAL
+    comment: "an integer with a rational suffix"
+  - name: INTEGER_RATIONAL_IMAGINARY
+    comment: "an integer with a rational and imaginary suffix"
+  - name: KEYWORD_ALIAS
+    comment: "alias"
+  - name: KEYWORD_AND
+    comment: "and"
+  - name: KEYWORD_BEGIN
+    comment: "begin"
+  - name: KEYWORD_BEGIN_UPCASE
+    comment: "BEGIN"
+  - name: KEYWORD_BREAK
+    comment: "break"
+  - name: KEYWORD_CASE
+    comment: "case"
+  - name: KEYWORD_CLASS
+    comment: "class"
+  - name: KEYWORD_DEF
+    comment: "def"
+  - name: KEYWORD_DEFINED
+    comment: "defined?"
+  - name: KEYWORD_DO_LOOP
+    comment: "do keyword for a predicate in a while, until, or for loop"
+  - name: KEYWORD_END_UPCASE
+    comment: "END"
+  - name: KEYWORD_FALSE
+    comment: "false"
+  - name: KEYWORD_FOR
+    comment: "for"
+  - name: KEYWORD_IF
+    comment: "if"
+  - name: KEYWORD_IF_MODIFIER
+    comment: "if in the modifier form"
+  - name: KEYWORD_MODULE
+    comment: "module"
+  - name: KEYWORD_NEXT
+    comment: "next"
+  - name: KEYWORD_NIL
+    comment: "nil"
+  - name: KEYWORD_NOT
+    comment: "not"
+  - name: KEYWORD_OR
+    comment: "or"
+  - name: KEYWORD_REDO
+    comment: "redo"
+  - name: KEYWORD_RESCUE_MODIFIER
+    comment: "rescue in the modifier form"
+  - name: KEYWORD_RETRY
+    comment: "retry"
+  - name: KEYWORD_RETURN
+    comment: "return"
+  - name: KEYWORD_SELF
+    comment: "self"
+  - name: KEYWORD_SUPER
+    comment: "super"
+  - name: KEYWORD_TRUE
+    comment: "true"
+  - name: KEYWORD_UNDEF
+    comment: "undef"
+  - name: KEYWORD_UNLESS
+    comment: "unless"
+  - name: KEYWORD_UNLESS_MODIFIER
+    comment: "unless in the modifier form"
+  - name: KEYWORD_UNTIL
+    comment: "until"
+  - name: KEYWORD_UNTIL_MODIFIER
+    comment: "until in the modifier form"
+  - name: KEYWORD_WHILE
+    comment: "while"
+  - name: KEYWORD_WHILE_MODIFIER
+    comment: "while in the modifier form"
+  - name: KEYWORD_YIELD
+    comment: "yield"
+  - name: KEYWORD___ENCODING__
+    comment: "__ENCODING__"
+  - name: KEYWORD___FILE__
+    comment: "__FILE__"
+  - name: KEYWORD___LINE__
+    comment: "__LINE__"
+  - name: LABEL
+    comment: "a label"
+  - name: LABEL_END
+    comment: "the end of a label"
+  - name: LAMBDA_BEGIN
+    comment: "{"
+  - name: LESS
+    comment: "<"
+  - name: LESS_EQUAL
+    comment: "<="
+  - name: LESS_EQUAL_GREATER
+    comment: "<=>"
+  - name: LESS_LESS
+    comment: "<<"
+  - name: LESS_LESS_EQUAL
+    comment: "<<="
+  - name: METHOD_NAME
+    comment: "a method name"
+  - name: MINUS
+    comment: "-"
+  - name: MINUS_EQUAL
+    comment: "-="
+  - name: MINUS_GREATER
+    comment: "->"
+  - name: NUMBERED_REFERENCE
+    comment: "a numbered reference to a capture group in the previous regular expression match"
+  - name: PARENTHESIS_LEFT
+    comment: "("
+  - name: PARENTHESIS_LEFT_PARENTHESES
+    comment: "( for a parentheses node"
+  - name: PERCENT
+    comment: "%"
+  - name: PERCENT_EQUAL
+    comment: "%="
+  - name: PERCENT_LOWER_I
+    comment: "%i"
+  - name: PERCENT_LOWER_W
+    comment: "%w"
+  - name: PERCENT_LOWER_X
+    comment: "%x"
+  - name: PERCENT_UPPER_I
+    comment: "%I"
+  - name: PERCENT_UPPER_W
+    comment: "%W"
+  - name: PIPE_EQUAL
+    comment: "|="
+  - name: PIPE_PIPE
+    comment: "||"
+  - name: PIPE_PIPE_EQUAL
+    comment: "||="
+  - name: PLUS
+    comment: "+"
+  - name: PLUS_EQUAL
+    comment: "+="
+  - name: QUESTION_MARK
+    comment: "?"
+  - name: REGEXP_BEGIN
+    comment: "the beginning of a regular expression"
+  - name: REGEXP_END
+    comment: "the end of a regular expression"
+  - name: SLASH
+    comment: "/"
+  - name: SLASH_EQUAL
+    comment: "/="
+  - name: STAR
+    comment: "*"
+  - name: STAR_EQUAL
+    comment: "*="
+  - name: STAR_STAR
+    comment: "**"
+  - name: STAR_STAR_EQUAL
+    comment: "**="
+  - name: STRING_BEGIN
+    comment: "the beginning of a string"
+  - name: STRING_CONTENT
+    comment: "the contents of a string"
+  - name: STRING_END
+    comment: "the end of a string"
+  - name: SYMBOL_BEGIN
+    comment: "the beginning of a symbol"
+  - name: TILDE
+    comment: "~ or ~@"
+  - name: UAMPERSAND
+    comment: "unary &"
+  - name: UCOLON_COLON
+    comment: "unary ::"
+  - name: UDOT_DOT
+    comment: "unary .. operator"
+  - name: UDOT_DOT_DOT
+    comment: "unary ... operator"
+  - name: UMINUS
+    comment: "-@"
+  - name: UMINUS_NUM
+    comment: "-@ for a number"
+  - name: UPLUS
+    comment: "+@"
+  - name: USTAR
+    comment: "unary *"
+  - name: USTAR_STAR
+    comment: "unary **"
+  - name: WORDS_SEP
+    comment: "a separator between words in a list"
+  - name: __END__
+    comment: "marker for the point in the file at which the parser should stop"
+  - name: MISSING
+    comment: "a token that was expected but not found"
+  - name: NOT_PROVIDED
+    comment: "a token that was not present but it is okay"
+flags:
+  - name: ArgumentsNodeFlags
+    values:
+      - name: CONTAINS_FORWARDING
+        comment: "if the arguments contain forwarding"
+      - name: CONTAINS_KEYWORDS
+        comment: "if the arguments contain keywords"
+      - name: CONTAINS_KEYWORD_SPLAT
+        comment: "if the arguments contain a keyword splat"
+      - name: CONTAINS_SPLAT
+        comment: "if the arguments contain a splat"
+      - name: CONTAINS_MULTIPLE_SPLATS
+        comment: "if the arguments contain multiple splats"
+    comment: Flags for arguments nodes.
+  - name: ArrayNodeFlags
+    values:
+      - name: CONTAINS_SPLAT
+        comment: "if array contains splat nodes"
+    comment: Flags for array nodes.
+  - name: CallNodeFlags
+    values:
+      - name: SAFE_NAVIGATION
+        comment: "&. operator"
+      - name: VARIABLE_CALL
+        comment: "a call that could have been a local variable"
+      - name: ATTRIBUTE_WRITE
+        comment: "a call that is an attribute write, so the value being written should be returned"
+      - name: IGNORE_VISIBILITY
+        comment: "a call that ignores method visibility"
+    comment: Flags for call nodes.
+  - name: EncodingFlags
+    values:
+      - name: FORCED_UTF8_ENCODING
+        comment: "internal bytes forced the encoding to UTF-8"
+      - name: FORCED_BINARY_ENCODING
+        comment: "internal bytes forced the encoding to binary"
+    comment: Flags for nodes that have unescaped content.
+  - name: IntegerBaseFlags
+    values:
+      - name: BINARY
+        comment: "0b prefix"
+      - name: DECIMAL
+        comment: "0d or no prefix"
+      - name: OCTAL
+        comment: "0o or 0 prefix"
+      - name: HEXADECIMAL
+        comment: "0x prefix"
+    comment: Flags for integer nodes that correspond to the base of the integer.
+  - name: InterpolatedStringNodeFlags
+    values:
+      - name: FROZEN
+        comment: "frozen by virtue of a `frozen_string_literal: true` comment or `--enable-frozen-string-literal`; only for adjacent string literals like `'a' 'b'`"
+      - name: MUTABLE
+        comment: "mutable by virtue of a `frozen_string_literal: false` comment or `--disable-frozen-string-literal`; only for adjacent string literals like `'a' 'b'`"
+    comment: Flags for interpolated string nodes that indicated mutability if they are also marked as literals.
+  - name: KeywordHashNodeFlags
+    values:
+      - name: SYMBOL_KEYS
+        comment: "a keyword hash which only has `AssocNode` elements all with symbol keys, which means the elements can be treated as keyword arguments"
+    comment: Flags for keyword hash nodes.
+  - name: LoopFlags
+    values:
+      - name: BEGIN_MODIFIER
+        comment: "a loop after a begin statement, so the body is executed first before the condition"
+    comment: Flags for while and until loop nodes.
+  - name: ParameterFlags
+    values:
+      - name: REPEATED_PARAMETER
+        comment: "a parameter name that has been repeated in the method signature"
+    comment: Flags for parameter nodes.
+  - name: ParenthesesNodeFlags
+    values:
+      - name: MULTIPLE_STATEMENTS
+        comment: "parentheses that contain multiple potentially void statements"
+    comment: Flags for parentheses nodes.
+  - name: RangeFlags
+    values:
+      - name: EXCLUDE_END
+        comment: "... operator"
+    comment: Flags for range and flip-flop nodes.
+  - name: RegularExpressionFlags
+    values:
+      - name: IGNORE_CASE
+        comment: "i - ignores the case of characters when matching"
+      - name: EXTENDED
+        comment: "x - ignores whitespace and allows comments in regular expressions"
+      - name: MULTI_LINE
+        comment: "m - allows $ to match the end of lines within strings"
+      - name: ONCE
+        comment: "o - only interpolates values into the regular expression once"
+      - name: EUC_JP
+        comment: "e - forces the EUC-JP encoding"
+      - name: ASCII_8BIT
+        comment: "n - forces the ASCII-8BIT encoding"
+      - name: WINDOWS_31J
+        comment: "s - forces the Windows-31J encoding"
+      - name: UTF_8
+        comment: "u - forces the UTF-8 encoding"
+      - name: FORCED_UTF8_ENCODING
+        comment: "internal bytes forced the encoding to UTF-8"
+      - name: FORCED_BINARY_ENCODING
+        comment: "internal bytes forced the encoding to binary"
+      - name: FORCED_US_ASCII_ENCODING
+        comment: "internal bytes forced the encoding to US-ASCII"
+    comment: Flags for regular expression and match last line nodes.
+  - name: ShareableConstantNodeFlags
+    values:
+      - name: LITERAL
+        comment: "constant writes that should be modified with shareable constant value literal"
+      - name: EXPERIMENTAL_EVERYTHING
+        comment: "constant writes that should be modified with shareable constant value experimental everything"
+      - name: EXPERIMENTAL_COPY
+        comment: "constant writes that should be modified with shareable constant value experimental copy"
+    comment: Flags for shareable constant nodes.
+  - name: StringFlags
+    values:
+      - name: FORCED_UTF8_ENCODING
+        comment: "internal bytes forced the encoding to UTF-8"
+      - name: FORCED_BINARY_ENCODING
+        comment: "internal bytes forced the encoding to binary"
+      - name: FROZEN
+        comment: "frozen by virtue of a `frozen_string_literal: true` comment or `--enable-frozen-string-literal`"
+      - name: MUTABLE
+        comment: "mutable by virtue of a `frozen_string_literal: false` comment or `--disable-frozen-string-literal`"
+    comment: Flags for string nodes.
+  - name: SymbolFlags
+    values:
+      - name: FORCED_UTF8_ENCODING
+        comment: "internal bytes forced the encoding to UTF-8"
+      - name: FORCED_BINARY_ENCODING
+        comment: "internal bytes forced the encoding to binary"
+      - name: FORCED_US_ASCII_ENCODING
+        comment: "internal bytes forced the encoding to US-ASCII"
+    comment: Flags for symbol nodes.
+nodes:
+  - name: AliasGlobalVariableNode
+    fields:
+      - name: new_name
+        type: node
+        kind:
+          - GlobalVariableReadNode
+          - BackReferenceReadNode
+          - NumberedReferenceReadNode
+        comment: |
+          Represents the new name of the global variable that can be used after aliasing.
+
+              alias $foo $bar
+                    ^^^^
+      - name: old_name
+        type: node
+        kind:
+          - GlobalVariableReadNode
+          - BackReferenceReadNode
+          - NumberedReferenceReadNode
+          - on error: SymbolNode # alias $a b
+          - on error: MissingNode # alias $a 42
+        comment: |
+          Represents the old name of the global variable that can be used before aliasing.
+
+              alias $foo $bar
+                         ^^^^
+      - name: keyword_loc
+        type: location
+        comment: |
+          The location of the `alias` keyword.
+
+              alias $foo $bar
+              ^^^^^
+    comment: |
+      Represents the use of the `alias` keyword to alias a global variable.
+
+          alias $foo $bar
+          ^^^^^^^^^^^^^^^
+  - name: AliasMethodNode
+    fields:
+      - name: new_name
+        type: node
+        kind:
+          - SymbolNode
+          - InterpolatedSymbolNode
+        comment: |
+          Represents the new name of the method that will be aliased.
+
+              alias foo bar
+                    ^^^
+
+              alias :foo :bar
+                    ^^^^
+
+              alias :"#{foo}" :"#{bar}"
+                    ^^^^^^^^^
+      - name: old_name
+        type: node
+        kind:
+          - SymbolNode
+          - InterpolatedSymbolNode
+          - on error: GlobalVariableReadNode # alias a $b
+          - on error: MissingNode # alias a 42
+        comment: |
+          Represents the old name of the method that will be aliased.
+
+              alias foo bar
+                        ^^^
+
+              alias :foo :bar
+                         ^^^^
+
+              alias :"#{foo}" :"#{bar}"
+                              ^^^^^^^^^
+      - name: keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `alias` keyword.
+
+              alias foo bar
+              ^^^^^
+    comment: |
+      Represents the use of the `alias` keyword to alias a method.
+
+          alias foo bar
+          ^^^^^^^^^^^^^
+  - name: AlternationPatternNode
+    fields:
+      - name: left
+        type: node
+        kind: pattern expression
+        comment: |
+          Represents the left side of the expression.
+
+              foo => bar | baz
+                     ^^^
+      - name: right
+        type: node
+        kind: pattern expression
+        comment: |
+          Represents the right side of the expression.
+
+              foo => bar | baz
+                           ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the alternation operator location.
+
+              foo => bar | baz
+                         ^
+    comment: |
+      Represents an alternation pattern in pattern matching.
+
+          foo => bar | baz
+                 ^^^^^^^^^
+  - name: AndNode
+    fields:
+      - name: left
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the left side of the expression. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              left and right
+              ^^^^
+
+              1 && 2
+              ^
+      - name: right
+        type: node
+        kind: Node
+        comment: |
+          Represents the right side of the expression.
+
+              left && right
+                      ^^^^^
+
+              1 and 2
+                    ^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `and` keyword or the `&&` operator.
+
+              left and right
+                   ^^^
+    comment: |
+      Represents the use of the `&&` operator or the `and` keyword.
+
+          left and right
+          ^^^^^^^^^^^^^^
+  - name: ArgumentsNode
+    flags: ArgumentsNodeFlags
+    fields:
+      - name: arguments
+        type: node[]
+        kind: non-void expression
+        comment: |
+          The list of arguments, if present. These can be any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo(bar, baz)
+                  ^^^^^^^^
+    comment: |
+      Represents a set of arguments to a method or a keyword.
+
+          return foo, bar, baz
+                 ^^^^^^^^^^^^^
+  - name: ArrayNode
+    flags: ArrayNodeFlags
+    fields:
+      - name: elements
+        type: node[]
+        kind: non-void expression
+        comment: Represent the list of zero or more [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression) within the array.
+      - name: opening_loc
+        type: location?
+        comment: |
+          Represents the optional source location for the opening token.
+
+              [1,2,3]                 # "["
+              %w[foo bar baz]         # "%w["
+              %I(apple orange banana) # "%I("
+              foo = 1, 2, 3           # nil
+      - name: closing_loc
+        type: location?
+        comment: |
+          Represents the optional source location for the closing token.
+
+              [1,2,3]                 # "]"
+              %w[foo bar baz]         # "]"
+              %I(apple orange banana) # ")"
+              foo = 1, 2, 3           # nil
+    comment: |
+      Represents an array literal. This can be a regular array using brackets or a special array using % like %w or %i.
+
+          [1, 2, 3]
+          ^^^^^^^^^
+  - name: ArrayPatternNode
+    fields:
+      - name: constant
+        type: node?
+        kind:
+          - ConstantPathNode
+          - ConstantReadNode
+        comment: |
+          Represents the optional constant preceding the Array
+
+              foo in Bar[]
+                     ^^^
+
+              foo in Bar[1, 2, 3]
+                     ^^^
+
+              foo in Bar::Baz[1, 2, 3]
+                     ^^^^^^^^
+      - name: requireds
+        type: node[]
+        kind: pattern expression
+        comment: |
+          Represents the required elements of the array pattern.
+
+              foo in [1, 2]
+                      ^  ^
+      - name: rest
+        type: node?
+        kind: pattern expression
+        comment: |
+          Represents the rest element of the array pattern.
+
+              foo in *bar
+                     ^^^^
+      - name: posts
+        type: node[]
+        kind: pattern expression
+        comment: |
+          Represents the elements after the rest element of the array pattern.
+
+              foo in *bar, baz
+                           ^^^
+      - name: opening_loc
+        type: location?
+        comment: |
+          Represents the opening location of the array pattern.
+
+              foo in [1, 2]
+                     ^
+      - name: closing_loc
+        type: location?
+        comment: |
+          Represents the closing location of the array pattern.
+
+              foo in [1, 2]
+                          ^
+    comment: |
+      Represents an array pattern in pattern matching.
+
+          foo in 1, 2
+          ^^^^^^^^^^^
+
+          foo in [1, 2]
+          ^^^^^^^^^^^^^
+
+          foo in *bar
+          ^^^^^^^^^^^
+
+          foo in Bar[]
+          ^^^^^^^^^^^^
+
+          foo in Bar[1, 2, 3]
+          ^^^^^^^^^^^^^^^^^^^
+  - name: AssocNode
+    fields:
+      - name: key
+        type: node
+        kind: non-void expression
+        comment: |
+          The key of the association. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              { a: b }
+                ^
+
+              { foo => bar }
+                ^^^
+
+              { def a; end => 1 }
+                ^^^^^^^^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value of the association, if present. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              { foo => bar }
+                       ^^^
+
+              { x: 1 }
+                   ^
+      - name: operator_loc
+        type: location?
+        comment: |
+          The location of the `=>` operator, if present.
+
+              { foo => bar }
+                    ^^
+    comment: |
+      Represents a hash key/value pair.
+
+          { a => b }
+            ^^^^^^
+  - name: AssocSplatNode
+    fields:
+      - name: value
+        type: node?
+        kind: non-void expression
+        comment: |
+          The value to be splatted, if present. Will be missing when keyword rest argument forwarding is used.
+
+              { **foo }
+                  ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `**` operator.
+
+              { **x }
+                ^^
+    comment: |
+      Represents a splat in a hash literal.
+
+          { **foo }
+            ^^^^^
+  - name: BackReferenceReadNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the back-reference variable, including the leading `$`.
+
+              $& # name `:$&`
+
+              $+ # name `:$+`
+    comment: |
+      Represents reading a reference to a field in the previous match.
+
+          $'
+          ^^
+  - name: BeginNode
+    fields:
+      - name: begin_keyword_loc
+        type: location?
+        comment: |
+          Represents the location of the `begin` keyword.
+
+              begin x end
+              ^^^^^
+      - name: statements
+        type: node?
+        kind: StatementsNode
+        comment: |
+          Represents the statements within the begin block.
+
+              begin x end
+                    ^
+      - name: rescue_clause
+        type: node?
+        kind: RescueNode
+        comment: |
+          Represents the rescue clause within the begin block.
+
+              begin x; rescue y; end
+                       ^^^^^^^^
+      - name: else_clause
+        type: node?
+        kind: ElseNode
+        comment: |
+          Represents the else clause within the begin block.
+
+              begin x; rescue y; else z; end
+                                 ^^^^^^
+      - name: ensure_clause
+        type: node?
+        kind: EnsureNode
+        comment: |
+          Represents the ensure clause within the begin block.
+
+              begin x; ensure y; end
+                       ^^^^^^^^
+      - name: end_keyword_loc
+        type: location?
+        comment: |
+          Represents the location of the `end` keyword.
+
+              begin x end
+                      ^^^
+    newline: false
+    comment: |
+      Represents a begin statement.
+
+          begin
+            foo
+          end
+          ^^^^^
+  - name: BlockArgumentNode
+    fields:
+      - name: expression
+        type: node?
+        kind: non-void expression
+        comment: |
+          The expression that is being passed as a block argument. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo(&args)
+                  ^^^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the location of the `&` operator.
+
+              foo(&args)
+                  ^
+    comment: |
+      Represents a block argument using `&`.
+
+          bar(&args)
+          ^^^^^^^^^^
+  - name: BlockLocalVariableNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the block local variable.
+
+              a { |; b| } # name `:b`
+                     ^
+    comment: |
+      Represents a block local variable.
+
+          a { |; b| }
+                 ^
+  - name: BlockNode
+    fields:
+      - name: locals
+        type: constant[]
+        comment: |
+          The local variables declared in the block.
+
+              [1, 2, 3].each { |i| puts x } # locals: [:i]
+                                ^
+      - name: parameters
+        type: node?
+        kind:
+          - BlockParametersNode
+          - NumberedParametersNode
+          - ItParametersNode
+        comment: |
+          The parameters of the block.
+
+              [1, 2, 3].each { |i| puts x }
+                               ^^^
+              [1, 2, 3].each { puts _1 }
+                             ^^^^^^^^^^^
+              [1, 2, 3].each { puts it }
+                             ^^^^^^^^^^^
+      - name: body
+        type: node?
+        kind:
+          - StatementsNode
+          - BeginNode
+        comment: |
+          The body of the block.
+
+              [1, 2, 3].each { |i| puts x }
+                                   ^^^^^^
+      - name: opening_loc
+        type: location
+        comment: |
+          Represents the location of the opening `{` or `do`.
+
+              [1, 2, 3].each { |i| puts x }
+                             ^
+      - name: closing_loc
+        type: location
+        comment: |
+          Represents the location of the closing `}` or `end`.
+
+              [1, 2, 3].each { |i| puts x }
+                                          ^
+    comment: |
+      Represents a block of ruby code.
+
+          [1, 2, 3].each { |i| puts x }
+                         ^^^^^^^^^^^^^^
+  - name: BlockParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant?
+        comment: |
+          The name of the block parameter.
+
+              def a(&b) # name `:b`
+                     ^
+              end
+      - name: name_loc
+        type: location?
+        comment: |
+          Represents the location of the block parameter name.
+
+              def a(&b)
+                     ^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the location of the `&` operator.
+
+              def a(&b)
+                    ^
+              end
+    comment: |
+      Represents a block parameter of a method, block, or lambda definition.
+
+          def a(&b)
+                ^^
+          end
+  - name: BlockParametersNode
+    fields:
+      - name: parameters
+        type: node?
+        kind: ParametersNode
+        comment: |
+          Represents the parameters of the block.
+
+              -> (a, b = 1; local) { }
+                  ^^^^^^^^
+
+              foo do |a, b = 1; local|
+                      ^^^^^^^^
+              end
+      - name: locals
+        type: node[]
+        kind: BlockLocalVariableNode
+        comment: |
+          Represents the local variables of the block.
+
+              -> (a, b = 1; local) { }
+                            ^^^^^
+
+              foo do |a, b = 1; local|
+                                ^^^^^
+              end
+      - name: opening_loc
+        type: location?
+        comment: |
+          Represents the opening location of the block parameters.
+
+              -> (a, b = 1; local) { }
+                 ^
+
+              foo do |a, b = 1; local|
+                     ^
+              end
+      - name: closing_loc
+        type: location?
+        comment: |
+          Represents the closing location of the block parameters.
+
+              -> (a, b = 1; local) { }
+                                 ^
+
+              foo do |a, b = 1; local|
+                                     ^
+              end
+    comment: |
+      Represents a block's parameters declaration.
+
+          -> (a, b = 1; local) { }
+             ^^^^^^^^^^^^^^^^^
+
+          foo do |a, b = 1; local|
+                 ^^^^^^^^^^^^^^^^^
+          end
+  - name: BreakNode
+    fields:
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+        comment: |
+          The arguments to the break statement, if present. These can be any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              break foo
+                    ^^^
+      - name: keyword_loc
+        type: location
+        comment: |
+          The location of the `break` keyword.
+
+              break foo
+              ^^^^^
+    comment: |
+      Represents the use of the `break` keyword.
+
+          break foo
+          ^^^^^^^^^
+  - name: CallAndWriteNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+        comment: |
+          The object that the method is being called on. This can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo.bar &&= value
+              ^^^
+      - name: call_operator_loc
+        type: location?
+        comment: |
+          Represents the location of the call operator.
+
+              foo.bar &&= value
+                 ^
+      - name: message_loc
+        type: location?
+        comment: |
+          Represents the location of the message.
+
+              foo.bar &&= value
+                  ^^^
+      - name: read_name
+        type: constant
+        comment: |
+          Represents the name of the method being called.
+
+              foo.bar &&= value # read_name `:bar`
+                  ^^^
+      - name: write_name
+        type: constant
+        comment: |
+          Represents the name of the method being written to.
+
+              foo.bar &&= value # write_name `:bar=`
+                  ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the location of the operator.
+
+              foo.bar &&= value
+                      ^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the value being assigned.
+
+              foo.bar &&= value
+                          ^^^^^
+    comment: |
+      Represents the use of the `&&=` operator on a call.
+
+          foo.bar &&= value
+          ^^^^^^^^^^^^^^^^^
+  - name: CallNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+        comment: |
+          The object that the method is being called on. This can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo.bar
+              ^^^
+
+              +foo
+               ^^^
+
+              foo + bar
+              ^^^
+      - name: call_operator_loc
+        type: location?
+        comment: |
+          Represents the location of the call operator.
+
+              foo.bar
+                 ^
+
+              foo&.bar
+                 ^^
+      - name: name
+        type: constant
+        comment: |
+          Represents the name of the method being called.
+
+              foo.bar # name `:foo`
+              ^^^
+      - name: message_loc
+        type: location?
+        comment: |
+          Represents the location of the message.
+
+              foo.bar
+                  ^^^
+      - name: opening_loc
+        type: location?
+        comment: |
+          Represents the location of the left parenthesis.
+              foo(bar)
+                 ^
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+        comment: |
+          Represents the arguments to the method call. These can be any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo(bar)
+                  ^^^
+      - name: closing_loc
+        type: location?
+        comment: |
+          Represents the location of the right parenthesis.
+
+              foo(bar)
+                     ^
+      - name: equal_loc
+        type: location?
+        comment: |
+          Represents the location of the equal sign, in the case that this is an attribute write.
+
+              foo.bar = value
+                      ^
+
+              foo[bar] = value
+                       ^
+      - name: block
+        type: node?
+        kind:
+          - BlockNode
+          - BlockArgumentNode
+        comment: |
+          Represents the block that is being passed to the method.
+
+              foo { |a| a }
+                  ^^^^^^^^^
+    comment: |
+      Represents a method call, in all of the various forms that can take.
+
+          foo
+          ^^^
+
+          foo()
+          ^^^^^
+
+          +foo
+          ^^^^
+
+          foo + bar
+          ^^^^^^^^^
+
+          foo.bar
+          ^^^^^^^
+
+          foo&.bar
+          ^^^^^^^^
+  - name: CallOperatorWriteNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+        comment: |
+          The object that the method is being called on. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo.bar += value
+              ^^^
+      - name: call_operator_loc
+        type: location?
+        comment: |
+          Represents the location of the call operator.
+
+              foo.bar += value
+                 ^
+      - name: message_loc
+        type: location?
+        comment: |
+          Represents the location of the message.
+
+              foo.bar += value
+                  ^^^
+      - name: read_name
+        type: constant
+        comment: |
+          Represents the name of the method being called.
+
+              foo.bar += value # read_name `:bar`
+                  ^^^
+      - name: write_name
+        type: constant
+        comment: |
+          Represents the name of the method being written to.
+
+              foo.bar += value # write_name `:bar=`
+                  ^^^
+      - name: binary_operator
+        type: constant
+        comment: |
+          Represents the binary operator being used.
+
+              foo.bar += value # binary_operator `:+`
+                      ^
+      - name: binary_operator_loc
+        type: location
+        comment: |
+          Represents the location of the binary operator.
+
+              foo.bar += value
+                      ^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the value being assigned.
+
+              foo.bar += value
+                         ^^^^^
+    comment: |
+      Represents the use of an assignment operator on a call.
+
+          foo.bar += baz
+          ^^^^^^^^^^^^^^
+  - name: CallOrWriteNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+        comment: |
+          The object that the method is being called on. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo.bar ||= value
+              ^^^
+      - name: call_operator_loc
+        type: location?
+        comment: |
+          Represents the location of the call operator.
+
+              foo.bar ||= value
+                 ^
+      - name: message_loc
+        type: location?
+        comment: |
+          Represents the location of the message.
+
+              foo.bar ||= value
+                  ^^^
+      - name: read_name
+        type: constant
+        comment: |
+          Represents the name of the method being called.
+
+              foo.bar ||= value # read_name `:bar`
+                  ^^^
+      - name: write_name
+        type: constant
+        comment: |
+          Represents the name of the method being written to.
+
+              foo.bar ||= value # write_name `:bar=`
+                  ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the location of the operator.
+
+              foo.bar ||= value
+                      ^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the value being assigned.
+
+              foo.bar ||= value
+                          ^^^^^
+    comment: |
+      Represents the use of the `||=` operator on a call.
+
+          foo.bar ||= value
+          ^^^^^^^^^^^^^^^^^
+  - name: CallTargetNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node
+        kind: non-void expression
+        comment: |
+          The object that the method is being called on. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo.bar = 1
+              ^^^
+      - name: call_operator_loc
+        type: location
+        comment: |
+          Represents the location of the call operator.
+
+              foo.bar = 1
+                 ^
+      - name: name
+        type: constant
+        comment: |
+          Represents the name of the method being called.
+
+              foo.bar = 1 # name `:foo`
+              ^^^
+      - name: message_loc
+        type: location
+        comment: |
+          Represents the location of the message.
+
+              foo.bar = 1
+                  ^^^
+    comment: |
+      Represents assigning to a method call.
+
+          foo.bar, = 1
+          ^^^^^^^
+
+          begin
+          rescue => foo.bar
+                    ^^^^^^^
+          end
+
+          for foo.bar in baz do end
+              ^^^^^^^
+  - name: CapturePatternNode
+    fields:
+      - name: value
+        type: node
+        kind: pattern expression
+        comment: |
+          Represents the value to capture.
+
+              foo => bar
+                     ^^^
+      - name: target
+        type: node
+        kind: LocalVariableTargetNode
+        comment: |
+          Represents the target of the capture.
+
+              foo => bar
+              ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the location of the `=>` operator.
+
+              foo => bar
+                  ^^
+    comment: |
+      Represents assigning to a local variable in pattern matching.
+
+          foo => [bar => baz]
+                 ^^^^^^^^^^^^
+  - name: CaseMatchNode
+    fields:
+      - name: predicate
+        type: node?
+        kind: non-void expression
+        comment: |
+          Represents the predicate of the case match. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              case true; in false; end
+              ^^^^
+      - name: conditions
+        type: node[]
+        kind: InNode
+        comment: |
+          Represents the conditions of the case match.
+
+              case true; in false; end
+                         ^^^^^^^^
+      - name: else_clause
+        type: node?
+        kind: ElseNode
+        comment: |
+          Represents the else clause of the case match.
+
+              case true; in false; else; end
+                                   ^^^^
+      - name: case_keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `case` keyword.
+
+              case true; in false; end
+              ^^^^
+      - name: end_keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `end` keyword.
+
+              case true; in false; end
+                                   ^^^
+    comment: |
+      Represents the use of a case statement for pattern matching.
+
+          case true
+          in false
+          end
+          ^^^^^^^^^
+  - name: CaseNode
+    fields:
+      - name: predicate
+        type: node?
+        kind: non-void expression
+        comment: |
+          Represents the predicate of the case statement. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              case true; when false; end
+                   ^^^^
+      - name: conditions
+        type: node[]
+        kind: WhenNode
+        comment: |
+          Represents the conditions of the case statement.
+
+              case true; when false; end
+                         ^^^^^^^^^^
+      - name: else_clause
+        type: node?
+        kind: ElseNode
+        comment: |
+          Represents the else clause of the case statement.
+
+              case true; when false; else; end
+                                     ^^^^
+      - name: case_keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `case` keyword.
+
+              case true; when false; end
+              ^^^^
+      - name: end_keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `end` keyword.
+
+              case true; when false; end
+                                     ^^^
+    comment: |
+      Represents the use of a case statement.
+
+          case true
+          when false
+          end
+          ^^^^^^^^^^
+  - name: ClassNode
+    fields:
+      - name: locals
+        type: constant[]
+      - name: class_keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `class` keyword.
+
+              class Foo end
+              ^^^^^
+      - name: constant_path
+        type: node
+        kind:
+          - ConstantReadNode
+          - ConstantPathNode
+          - on error: CallNode # class 0.X end
+      - name: inheritance_operator_loc
+        type: location?
+        comment: |
+          Represents the location of the `<` operator.
+
+              class Foo < Bar
+                        ^
+      - name: superclass
+        type: node?
+        kind: non-void expression
+        comment: |
+          Represents the superclass of the class.
+
+              class Foo < Bar
+                          ^^^
+      - name: body
+        type: node?
+        kind:
+          - StatementsNode
+          - BeginNode
+        comment: |
+          Represents the body of the class.
+
+              class Foo
+                foo
+                ^^^
+      - name: end_keyword_loc
+        type: location
+        comment: |
+          Represents the location of the `end` keyword.
+
+              class Foo end
+                        ^^^
+      - name: name
+        type: constant
+        comment: |
+          The name of the class.
+
+              class Foo end # name `:Foo`
+    comment: |
+      Represents a class declaration involving the `class` keyword.
+
+          class Foo end
+          ^^^^^^^^^^^^^
+  - name: ClassVariableAndWriteNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the class variable, which is a `@@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              @@target &&= value # name `:@@target`
+              ^^^^^^^^
+      - name: name_loc
+        type: location
+        comment: |
+          Represents the location of the variable name.
+
+              @@target &&= value
+              ^^^^^^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          Represents the location of the `&&=` operator.
+
+              @@target &&= value
+                       ^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the value being assigned. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              @@target &&= value
+                           ^^^^^
+    comment: |
+      Represents the use of the `&&=` operator for assignment to a class variable.
+
+          @@target &&= value
+          ^^^^^^^^^^^^^^^^^^
+  - name: ClassVariableOperatorWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: binary_operator
+        type: constant
+    comment: |
+      Represents assigning to a class variable using an operator that isn't `=`.
+
+          @@target += value
+          ^^^^^^^^^^^^^^^^^
+  - name: ClassVariableOrWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `||=` operator for assignment to a class variable.
+
+          @@target ||= value
+          ^^^^^^^^^^^^^^^^^^
+  - name: ClassVariableReadNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the class variable, which is a `@@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              @@abc   # name `:@@abc`
+
+              @@_test # name `:@@_test`
+    comment: |
+      Represents referencing a class variable.
+
+          @@foo
+          ^^^^^
+  - name: ClassVariableTargetNode
+    fields:
+      - name: name
+        type: constant
+    comment: |
+      Represents writing to a class variable in a context that doesn't have an explicit value.
+
+          @@foo, @@bar = baz
+          ^^^^^  ^^^^^
+  - name: ClassVariableWriteNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the class variable, which is a `@@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              @@abc = 123     # name `@@abc`
+
+              @@_test = :test # name `@@_test`
+      - name: name_loc
+        type: location
+        comment: |
+          The location of the variable name.
+
+              @@foo = :bar
+              ^^^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the class variable. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              @@foo = :bar
+                      ^^^^
+
+              @@_xyz = 123
+                       ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `=` operator.
+
+              @@foo = :bar
+                    ^
+    comment: |
+      Represents writing to a class variable.
+
+          @@foo = 1
+          ^^^^^^^^^
+  - name: ConstantAndWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `&&=` operator for assignment to a constant.
+
+          Target &&= value
+          ^^^^^^^^^^^^^^^^
+  - name: ConstantOperatorWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: binary_operator
+        type: constant
+    comment: |
+      Represents assigning to a constant using an operator that isn't `=`.
+
+          Target += value
+          ^^^^^^^^^^^^^^^
+  - name: ConstantOrWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `||=` operator for assignment to a constant.
+
+          Target ||= value
+          ^^^^^^^^^^^^^^^^
+  - name: ConstantPathAndWriteNode
+    fields:
+      - name: target
+        type: node
+        kind: ConstantPathNode
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `&&=` operator for assignment to a constant path.
+
+          Parent::Child &&= value
+          ^^^^^^^^^^^^^^^^^^^^^^^
+  - name: ConstantPathNode
+    fields:
+      - name: parent
+        type: node?
+        kind: non-void expression
+        comment: |
+          The left-hand node of the path, if present. It can be `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression). It will be `nil` when the constant lookup is at the root of the module tree.
+
+              Foo::Bar
+              ^^^
+
+              self::Test
+              ^^^^
+
+              a.b::C
+              ^^^
+      - name: name
+        type: constant?
+        comment: The name of the constant being accessed. This could be `nil` in the event of a syntax error.
+      - name: delimiter_loc
+        type: location
+        comment: |
+          The location of the `::` delimiter.
+
+              ::Foo
+              ^^
+
+              One::Two
+                 ^^
+      - name: name_loc
+        type: location
+        comment: |
+          The location of the name of the constant.
+
+              ::Foo
+                ^^^
+
+              One::Two
+                   ^^^
+    comment: |
+      Represents accessing a constant through a path of `::` operators.
+
+          Foo::Bar
+          ^^^^^^^^
+  - name: ConstantPathOperatorWriteNode
+    fields:
+      - name: target
+        type: node
+        kind: ConstantPathNode
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: binary_operator
+        type: constant
+    comment: |
+      Represents assigning to a constant path using an operator that isn't `=`.
+
+          Parent::Child += value
+          ^^^^^^^^^^^^^^^^^^^^^^
+  - name: ConstantPathOrWriteNode
+    fields:
+      - name: target
+        type: node
+        kind: ConstantPathNode
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `||=` operator for assignment to a constant path.
+
+          Parent::Child ||= value
+          ^^^^^^^^^^^^^^^^^^^^^^^
+  - name: ConstantPathTargetNode
+    fields:
+      - name: parent
+        type: node?
+        kind: non-void expression
+      - name: name
+        type: constant?
+      - name: delimiter_loc
+        type: location
+      - name: name_loc
+        type: location
+    comment: |
+      Represents writing to a constant path in a context that doesn't have an explicit value.
+
+          Foo::Foo, Bar::Bar = baz
+          ^^^^^^^^  ^^^^^^^^
+  - name: ConstantPathWriteNode
+    fields:
+      - name: target
+        type: node
+        kind: ConstantPathNode
+        comment: |
+          A node representing the constant path being written to.
+
+              Foo::Bar = 1
+              ^^^^^^^^
+
+              ::Foo = :abc
+              ^^^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `=` operator.
+
+              ::ABC = 123
+                    ^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the constant path. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              FOO::BAR = :abc
+                         ^^^^
+    comment: |
+      Represents writing to a constant path.
+
+          ::Foo = 1
+          ^^^^^^^^^
+
+          Foo::Bar = 1
+          ^^^^^^^^^^^^
+
+          ::Foo::Bar = 1
+          ^^^^^^^^^^^^^^
+  - name: ConstantReadNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the [constant](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#constants).
+
+              X              # name `:X`
+
+              SOME_CONSTANT  # name `:SOME_CONSTANT`
+    comment: |
+      Represents referencing a constant.
+
+          Foo
+          ^^^
+  - name: ConstantTargetNode
+    fields:
+      - name: name
+        type: constant
+    comment: |
+      Represents writing to a constant in a context that doesn't have an explicit value.
+
+          Foo, Bar = baz
+          ^^^  ^^^
+  - name: ConstantWriteNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the [constant](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#constants).
+
+              Foo = :bar # name `:Foo`
+
+              XYZ = 1    # name `:XYZ`
+      - name: name_loc
+        type: location
+        comment: |
+          The location of the constant name.
+
+              FOO = 1
+              ^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the constant. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              FOO = :bar
+                    ^^^^
+
+              MyClass = Class.new
+                        ^^^^^^^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `=` operator.
+
+              FOO = :bar
+                  ^
+    comment: |
+      Represents writing to a constant.
+
+          Foo = 1
+          ^^^^^^^
+  - name: DefNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: receiver
+        type: node?
+        kind: non-void expression
+      - name: parameters
+        type: node?
+        kind: ParametersNode
+      - name: body
+        type: node?
+        kind:
+          - StatementsNode
+          - BeginNode
+      - name: locals
+        type: constant[]
+      - name: def_keyword_loc
+        type: location
+      - name: operator_loc
+        type: location?
+      - name: lparen_loc
+        type: location?
+      - name: rparen_loc
+        type: location?
+      - name: equal_loc
+        type: location?
+      - name: end_keyword_loc
+        type: location?
+    comment: |
+      Represents a method definition.
+
+          def method
+          end
+          ^^^^^^^^^^
+  - name: DefinedNode
+    fields:
+      - name: lparen_loc
+        type: location?
+      - name: value
+        type: node
+        kind: Node # More than non-void expression as defined?(return) is allowed, yet defined?(BEGIN{}) is SyntaxError
+      - name: rparen_loc
+        type: location?
+      - name: keyword_loc
+        type: location
+    comment: |
+      Represents the use of the `defined?` keyword.
+
+          defined?(a)
+          ^^^^^^^^^^^
+  - name: ElseNode
+    fields:
+      - name: else_keyword_loc
+        type: location
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: end_keyword_loc
+        type: location?
+    comment: |
+      Represents an `else` clause in a `case`, `if`, or `unless` statement.
+
+          if a then b else c end
+                      ^^^^^^^^^^
+  - name: EmbeddedStatementsNode
+    fields:
+      - name: opening_loc
+        type: location
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: closing_loc
+        type: location
+    comment: |
+      Represents an interpolated set of statements.
+
+          "foo #{bar}"
+               ^^^^^^
+  - name: EmbeddedVariableNode
+    fields:
+      - name: operator_loc
+        type: location
+      - name: variable
+        type: node
+        kind:
+          - InstanceVariableReadNode
+          - ClassVariableReadNode
+          - GlobalVariableReadNode
+          - BackReferenceReadNode
+          - NumberedReferenceReadNode
+    comment: |
+      Represents an interpolated variable.
+
+          "foo #@bar"
+               ^^^^^
+  - name: EnsureNode
+    fields:
+      - name: ensure_keyword_loc
+        type: location
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: end_keyword_loc
+        type: location
+    comment: |
+      Represents an `ensure` clause in a `begin` statement.
+
+          begin
+            foo
+          ensure
+          ^^^^^^
+            bar
+          end
+  - name: FalseNode
+    comment: |
+      Represents the use of the literal `false` keyword.
+
+          false
+          ^^^^^
+  - name: FindPatternNode
+    fields:
+      - name: constant
+        type: node?
+        kind:
+          - ConstantPathNode
+          - ConstantReadNode
+        comment: |
+          Represents the optional constant preceding the pattern
+
+              foo in Foo(*bar, baz, *qux)
+                     ^^^
+      - name: left
+        type: node
+        kind: SplatNode
+        comment: |
+          Represents the first wildcard node in the pattern.
+
+              foo in *bar, baz, *qux
+                     ^^^^
+
+              foo in Foo(*bar, baz, *qux)
+                         ^^^^
+      - name: requireds
+        type: node[]
+        kind: pattern expression
+        comment: |
+          Represents the nodes in between the wildcards.
+
+              foo in *bar, baz, *qux
+                           ^^^
+
+              foo in Foo(*bar, baz, 1, *qux)
+                               ^^^^^^
+      - name: right
+        type: node
+        kind:
+          - SplatNode
+          - on error: MissingNode
+        comment: |
+          Represents the second wildcard node in the pattern.
+
+              foo in *bar, baz, *qux
+                                ^^^^
+
+              foo in Foo(*bar, baz, *qux)
+                                    ^^^^
+      - name: opening_loc
+        type: location?
+        comment: |
+          The location of the opening brace.
+
+              foo in [*bar, baz, *qux]
+                     ^
+
+              foo in Foo(*bar, baz, *qux)
+                        ^
+      - name: closing_loc
+        type: location?
+        comment: |
+          The location of the closing brace.
+
+              foo in [*bar, baz, *qux]
+                                     ^
+
+              foo in Foo(*bar, baz, *qux)
+                                        ^
+    comment: |
+      Represents a find pattern in pattern matching.
+
+          foo in *bar, baz, *qux
+                 ^^^^^^^^^^^^^^^
+
+          foo in [*bar, baz, *qux]
+                 ^^^^^^^^^^^^^^^^^
+
+          foo in Foo(*bar, baz, *qux)
+                 ^^^^^^^^^^^^^^^^^^^^
+
+          foo => *bar, baz, *qux
+                 ^^^^^^^^^^^^^^^
+  - name: FlipFlopNode
+    flags: RangeFlags
+    fields:
+      - name: left
+        type: node?
+        kind: non-void expression
+      - name: right
+        type: node?
+        kind: non-void expression
+      - name: operator_loc
+        type: location
+    comment: |
+      Represents the use of the `..` or `...` operators to create flip flops.
+
+          baz if foo .. bar
+                 ^^^^^^^^^^
+  - name: FloatNode
+    fields:
+      - name: value
+        type: double
+        comment: The value of the floating point number as a Float.
+    comment: |
+      Represents a floating point number literal.
+
+          1.0
+          ^^^
+  - name: ForNode
+    fields:
+      - name: index
+        type: node
+        kind:
+          - LocalVariableTargetNode
+          - InstanceVariableTargetNode
+          - ClassVariableTargetNode
+          - GlobalVariableTargetNode
+          - ConstantTargetNode
+          - ConstantPathTargetNode
+          - CallTargetNode
+          - IndexTargetNode
+          - MultiTargetNode
+          - on error: BackReferenceReadNode # for $& in a end
+          - on error: NumberedReferenceReadNode # for $1 in a end
+          - on error: MissingNode # for in 1..10; end
+        comment: |
+          The index expression for `for` loops.
+
+              for i in a end
+                  ^
+      - name: collection
+        type: node
+        kind: non-void expression
+        comment: |
+          The collection to iterate over.
+
+              for i in a end
+                       ^
+      - name: statements
+        type: node?
+        kind: StatementsNode
+        comment: |
+          Represents the body of statements to execute for each iteration of the loop.
+
+              for i in a
+                foo(i)
+                ^^^^^^
+              end
+      - name: for_keyword_loc
+        type: location
+        comment: |
+          The location of the `for` keyword.
+
+              for i in a end
+              ^^^
+      - name: in_keyword_loc
+        type: location
+        comment: |
+          The location of the `in` keyword.
+
+              for i in a end
+                    ^^
+      - name: do_keyword_loc
+        type: location?
+        comment: |
+          The location of the `do` keyword, if present.
+
+              for i in a do end
+                         ^^
+      - name: end_keyword_loc
+        type: location
+        comment: |
+          The location of the `end` keyword.
+
+              for i in a end
+                         ^^^
+    comment: |
+      Represents the use of the `for` keyword.
+
+          for i in a end
+          ^^^^^^^^^^^^^^
+  - name: ForwardingArgumentsNode
+    comment: |
+      Represents forwarding all arguments to this method to another method.
+
+          def foo(...)
+            bar(...)
+                ^^^
+          end
+  - name: ForwardingParameterNode
+    comment: |
+      Represents the use of the forwarding parameter in a method, block, or lambda declaration.
+
+          def foo(...)
+                  ^^^
+          end
+  - name: ForwardingSuperNode
+    fields:
+      - name: block
+        type: node?
+        kind: BlockNode
+        comment: |
+          All other arguments are forwarded as normal, except the original block is replaced with the new block.
+    comment: |
+      Represents the use of the `super` keyword without parentheses or arguments, but which might have a block.
+
+          super
+          ^^^^^
+
+          super { 123 }
+          ^^^^^^^^^^^^^
+
+      If it has any other arguments, it would be a `SuperNode` instead.
+  - name: GlobalVariableAndWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `&&=` operator for assignment to a global variable.
+
+          $target &&= value
+          ^^^^^^^^^^^^^^^^^
+  - name: GlobalVariableOperatorWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: binary_operator
+        type: constant
+    comment: |
+      Represents assigning to a global variable using an operator that isn't `=`.
+
+          $target += value
+          ^^^^^^^^^^^^^^^^
+  - name: GlobalVariableOrWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `||=` operator for assignment to a global variable.
+
+          $target ||= value
+          ^^^^^^^^^^^^^^^^^
+  - name: GlobalVariableReadNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the global variable, which is a `$` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifier). Alternatively, it can be one of the special global variables designated by a symbol.
+
+              $foo   # name `:$foo`
+
+              $_Test # name `:$_Test`
+    comment: |
+      Represents referencing a global variable.
+
+          $foo
+          ^^^^
+  - name: GlobalVariableTargetNode
+    fields:
+      - name: name
+        type: constant
+    comment: |
+      Represents writing to a global variable in a context that doesn't have an explicit value.
+
+          $foo, $bar = baz
+          ^^^^  ^^^^
+  - name: GlobalVariableWriteNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the global variable, which is a `$` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifier). Alternatively, it can be one of the special global variables designated by a symbol.
+
+              $foo = :bar  # name `:$foo`
+
+              $_Test = 123 # name `:$_Test`
+      - name: name_loc
+        type: location
+        comment: |
+          The location of the global variable's name.
+
+              $foo = :bar
+              ^^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the global variable. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              $foo = :bar
+                     ^^^^
+
+              $-xyz = 123
+                      ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `=` operator.
+
+              $foo = :bar
+                   ^
+    comment: |
+      Represents writing to a global variable.
+
+          $foo = 1
+          ^^^^^^^^
+  - name: HashNode
+    fields:
+      - name: opening_loc
+        type: location
+        comment: |
+          The location of the opening brace.
+
+              { a => b }
+              ^
+      - name: elements
+        type: node[]
+        kind:
+          - AssocNode
+          - AssocSplatNode
+        comment: |
+          The elements of the hash. These can be either `AssocNode`s or `AssocSplatNode`s.
+
+              { a: b }
+                ^^^^
+
+              { **foo }
+                ^^^^^
+      - name: closing_loc
+        type: location
+        comment: |
+          The location of the closing brace.
+
+              { a => b }
+                       ^
+    comment: |
+      Represents a hash literal.
+
+          { a => b }
+          ^^^^^^^^^^
+  - name: HashPatternNode
+    fields:
+      - name: constant
+        type: node?
+        kind:
+          - ConstantPathNode
+          - ConstantReadNode
+        comment: |
+          Represents the optional constant preceding the Hash.
+
+              foo => Bar[a: 1, b: 2]
+                   ^^^
+
+              foo => Bar::Baz[a: 1, b: 2]
+                   ^^^^^^^^
+      - name: elements
+        type: node[]
+        kind: AssocNode
+        comment: |
+          Represents the explicit named hash keys and values.
+
+              foo => { a: 1, b:, ** }
+                       ^^^^^^^^
+      - name: rest
+        type: node?
+        kind:
+          - AssocSplatNode
+          - NoKeywordsParameterNode
+        comment: |
+          Represents the rest of the Hash keys and values. This can be named, unnamed, or explicitly forbidden via `**nil`, this last one results in a `NoKeywordsParameterNode`.
+
+              foo => { a: 1, b:, **c }
+                                 ^^^
+
+              foo => { a: 1, b:, ** }
+                                 ^^
+
+              foo => { a: 1, b:, **nil }
+                                 ^^^^^
+      - name: opening_loc
+        type: location?
+        comment: |
+          The location of the opening brace.
+
+              foo => { a: 1 }
+                     ^
+
+              foo => Bar[a: 1]
+                        ^
+      - name: closing_loc
+        type: location?
+        comment: |
+          The location of the closing brace.
+
+              foo => { a: 1 }
+                            ^
+
+              foo => Bar[a: 1]
+                             ^
+    comment: |
+      Represents a hash pattern in pattern matching.
+
+          foo => { a: 1, b: 2 }
+                 ^^^^^^^^^^^^^^
+
+          foo => { a: 1, b: 2, **c }
+                 ^^^^^^^^^^^^^^^^^^^
+
+          foo => Bar[a: 1, b: 2]
+                 ^^^^^^^^^^^^^^^
+
+          foo in { a: 1, b: 2 }
+                 ^^^^^^^^^^^^^^
+  - name: IfNode
+    fields:
+      - name: if_keyword_loc
+        type: location?
+        comment: |
+          The location of the `if` keyword if present.
+
+              bar if foo
+                  ^^
+
+          The `if_keyword_loc` field will be `nil` when the `IfNode` represents a ternary expression.
+      - name: predicate
+        type: node
+        kind: non-void expression
+        comment: |
+          The node for the condition the `IfNode` is testing.
+
+              if foo
+                 ^^^
+                bar
+              end
+
+              bar if foo
+                     ^^^
+
+              foo ? bar : baz
+              ^^^
+      - name: then_keyword_loc
+        type: location?
+        comment: |
+          The location of the `then` keyword (if present) or the `?` in a ternary expression, `nil` otherwise.
+
+              if foo then bar end
+                     ^^^^
+
+              a ? b : c
+                ^
+      - name: statements
+        type: node?
+        kind: StatementsNode
+        comment: |
+          Represents the body of statements that will be executed when the predicate is evaluated as truthy. Will be `nil` when no body is provided.
+
+              if foo
+                bar
+                ^^^
+                baz
+                ^^^
+              end
+      - name: subsequent
+        type: node?
+        kind:
+          - ElseNode
+          - IfNode
+        comment: |
+          Represents an `ElseNode` or an `IfNode` when there is an `else` or an `elsif` in the `if` statement.
+
+              if foo
+                bar
+              elsif baz
+              ^^^^^^^^^
+                qux
+                ^^^
+              end
+              ^^^
+
+              if foo then bar else baz end
+                              ^^^^^^^^^^^^
+      - name: end_keyword_loc
+        type: location?
+        comment: |
+          The location of the `end` keyword if present, `nil` otherwise.
+
+              if foo
+                bar
+              end
+              ^^^
+    newline: predicate
+    comment: |
+      Represents the use of the `if` keyword, either in the block form or the modifier form, or a ternary expression.
+
+          bar if foo
+          ^^^^^^^^^^
+
+          if foo then bar end
+          ^^^^^^^^^^^^^^^^^^^
+
+          foo ? bar : baz
+          ^^^^^^^^^^^^^^^
+  - name: ImaginaryNode
+    fields:
+      - name: numeric
+        type: node
+        kind:
+          - FloatNode
+          - IntegerNode
+          - RationalNode
+    comment: |
+      Represents an imaginary number literal.
+
+          1.0i
+          ^^^^
+  - name: ImplicitNode
+    fields:
+      - name: value
+        type: node
+        kind:
+          - LocalVariableReadNode
+          - CallNode
+          - ConstantReadNode
+          - LocalVariableTargetNode
+    comment: |
+      Represents a node that is implicitly being added to the tree but doesn't correspond directly to a node in the source.
+
+          { foo: }
+            ^^^^
+
+          { Foo: }
+            ^^^^
+
+          foo in { bar: }
+                   ^^^^
+  - name: ImplicitRestNode
+    comment: |
+      Represents using a trailing comma to indicate an implicit rest parameter.
+
+          foo { |bar,| }
+                    ^
+
+          foo in [bar,]
+                     ^
+
+          for foo, in bar do end
+                 ^
+
+          foo, = bar
+             ^
+  - name: InNode
+    fields:
+      - name: pattern
+        type: node
+        kind: pattern expression
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: in_loc
+        type: location
+      - name: then_loc
+        type: location?
+    comment: |
+      Represents the use of the `in` keyword in a case statement.
+
+          case a; in b then c end
+                  ^^^^^^^^^^^
+  - name: IndexAndWriteNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+      - name: call_operator_loc
+        type: location?
+      - name: opening_loc
+        type: location
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+      - name: closing_loc
+        type: location
+      - name: block
+        type: node?
+        kind: BlockArgumentNode # foo[&b] &&= value, only valid on Ruby < 3.4
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `&&=` operator on a call to the `[]` method.
+
+          foo.bar[baz] &&= value
+          ^^^^^^^^^^^^^^^^^^^^^^
+  - name: IndexOperatorWriteNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+      - name: call_operator_loc
+        type: location?
+      - name: opening_loc
+        type: location
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+      - name: closing_loc
+        type: location
+      - name: block
+        type: node?
+        kind: BlockArgumentNode # foo[&b] += value, only valid on Ruby < 3.4
+      - name: binary_operator
+        type: constant
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of an assignment operator on a call to `[]`.
+
+          foo.bar[baz] += value
+          ^^^^^^^^^^^^^^^^^^^^^
+  - name: IndexOrWriteNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node?
+        kind: non-void expression
+      - name: call_operator_loc
+        type: location?
+      - name: opening_loc
+        type: location
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+      - name: closing_loc
+        type: location
+      - name: block
+        type: node?
+        kind: BlockArgumentNode # foo[&b] ||= value, only valid on Ruby < 3.4
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `||=` operator on a call to `[]`.
+
+          foo.bar[baz] ||= value
+          ^^^^^^^^^^^^^^^^^^^^^^
+  - name: IndexTargetNode
+    flags: CallNodeFlags
+    fields:
+      - name: receiver
+        type: node
+        kind: non-void expression
+      - name: opening_loc
+        type: location
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+      - name: closing_loc
+        type: location
+      - name: block
+        type: node?
+        kind: BlockArgumentNode # foo[&b], = 1, only valid on Ruby < 3.4
+    comment: |
+      Represents assigning to an index.
+
+          foo[bar], = 1
+          ^^^^^^^^
+
+          begin
+          rescue => foo[bar]
+                    ^^^^^^^^
+          end
+
+          for foo[bar] in baz do end
+              ^^^^^^^^
+  - name: InstanceVariableAndWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `&&=` operator for assignment to an instance variable.
+
+          @target &&= value
+          ^^^^^^^^^^^^^^^^^
+  - name: InstanceVariableOperatorWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: binary_operator
+        type: constant
+    comment: |
+      Represents assigning to an instance variable using an operator that isn't `=`.
+
+          @target += value
+          ^^^^^^^^^^^^^^^^
+  - name: InstanceVariableOrWriteNode
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents the use of the `||=` operator for assignment to an instance variable.
+
+          @target ||= value
+          ^^^^^^^^^^^^^^^^^
+  - name: InstanceVariableReadNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the instance variable, which is a `@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              @x     # name `:@x`
+
+              @_test # name `:@_test`
+    comment: |
+      Represents referencing an instance variable.
+
+          @foo
+          ^^^^
+  - name: InstanceVariableTargetNode
+    fields:
+      - name: name
+        type: constant
+    comment: |
+      Represents writing to an instance variable in a context that doesn't have an explicit value.
+
+          @foo, @bar = baz
+          ^^^^  ^^^^
+  - name: InstanceVariableWriteNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the instance variable, which is a `@` followed by an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              @x = :y       # name `:@x`
+
+              @_foo = "bar" # name `@_foo`
+      - name: name_loc
+        type: location
+        comment: |
+          The location of the variable name.
+
+              @_x = 1
+              ^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the instance variable. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              @foo = :bar
+                     ^^^^
+
+              @_x = 1234
+                    ^^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `=` operator.
+
+              @x = y
+                 ^
+    comment: |
+      Represents writing to an instance variable.
+
+          @foo = 1
+          ^^^^^^^^
+  - name: IntegerNode
+    flags: IntegerBaseFlags
+    fields:
+      - name: value
+        type: integer
+        comment: The value of the integer literal as a number.
+    comment: |
+      Represents an integer number literal.
+
+          1
+          ^
+  - name: InterpolatedMatchLastLineNode
+    flags: RegularExpressionFlags
+    fields:
+      - name: opening_loc
+        type: location
+      - name: parts
+        type: node[]
+        kind:
+          - StringNode
+          - EmbeddedStatementsNode
+          - EmbeddedVariableNode
+      - name: closing_loc
+        type: location
+    newline: parts
+    comment: |
+      Represents a regular expression literal that contains interpolation that is being used in the predicate of a conditional to implicitly match against the last line read by an IO object.
+
+          if /foo #{bar} baz/ then end
+             ^^^^^^^^^^^^^^^^
+  - name: InterpolatedRegularExpressionNode
+    flags: RegularExpressionFlags
+    fields:
+      - name: opening_loc
+        type: location
+      - name: parts
+        type: node[]
+        kind:
+          - StringNode
+          - EmbeddedStatementsNode
+          - EmbeddedVariableNode
+      - name: closing_loc
+        type: location
+    newline: parts
+    comment: |
+      Represents a regular expression literal that contains interpolation.
+
+          /foo #{bar} baz/
+          ^^^^^^^^^^^^^^^^
+  - name: InterpolatedStringNode
+    flags: InterpolatedStringNodeFlags
+    fields:
+      - name: opening_loc
+        type: location?
+      - name: parts
+        type: node[]
+        kind:
+          - StringNode
+          - EmbeddedStatementsNode
+          - EmbeddedVariableNode
+          - InterpolatedStringNode # `"a" "#{b}"`
+          - on error: XStringNode # `<<`FOO` "bar"
+          - on error: InterpolatedXStringNode
+          - on error: SymbolNode
+          - on error: InterpolatedSymbolNode
+      - name: closing_loc
+        type: location?
+    newline: parts
+    comment: |
+      Represents a string literal that contains interpolation.
+
+          "foo #{bar} baz"
+          ^^^^^^^^^^^^^^^^
+  - name: InterpolatedSymbolNode
+    fields:
+      - name: opening_loc
+        type: location?
+      - name: parts
+        type: node[]
+        kind:
+          - StringNode
+          - EmbeddedStatementsNode
+          - EmbeddedVariableNode
+      - name: closing_loc
+        type: location?
+    newline: parts
+    comment: |
+      Represents a symbol literal that contains interpolation.
+
+          :"foo #{bar} baz"
+          ^^^^^^^^^^^^^^^^^
+  - name: InterpolatedXStringNode
+    fields:
+      - name: opening_loc
+        type: location
+      - name: parts
+        type: node[]
+        kind:
+          - StringNode
+          - EmbeddedStatementsNode
+          - EmbeddedVariableNode
+      - name: closing_loc
+        type: location
+    newline: parts
+    comment: |
+      Represents an xstring literal that contains interpolation.
+
+          `foo #{bar} baz`
+          ^^^^^^^^^^^^^^^^
+  - name: ItLocalVariableReadNode
+    comment: |
+      Represents reading from the implicit `it` local variable.
+
+          -> { it }
+               ^^
+  - name: ItParametersNode
+    comment: |
+      Represents an implicit set of parameters through the use of the `it` keyword within a block or lambda.
+
+          -> { it + it }
+          ^^^^^^^^^^^^^^
+  - name: KeywordHashNode
+    flags: KeywordHashNodeFlags
+    fields:
+      - name: elements
+        type: node[]
+        kind:
+          - AssocNode
+          - AssocSplatNode
+    comment: |
+      Represents a hash literal without opening and closing braces.
+
+          foo(a: b)
+              ^^^^
+  - name: KeywordRestParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant?
+      - name: name_loc
+        type: location?
+      - name: operator_loc
+        type: location
+    comment: |
+      Represents a keyword rest parameter to a method, block, or lambda definition.
+
+          def a(**b)
+                ^^^
+          end
+  - name: LambdaNode
+    fields:
+      - name: locals
+        type: constant[]
+      - name: operator_loc
+        type: location
+      - name: opening_loc
+        type: location
+      - name: closing_loc
+        type: location
+      - name: parameters
+        type: node?
+        kind:
+          - BlockParametersNode
+          - NumberedParametersNode
+          - ItParametersNode
+      - name: body
+        type: node?
+        kind:
+          - StatementsNode
+          - BeginNode
+    comment: |
+      Represents using a lambda literal (not the lambda method call).
+
+          ->(value) { value * 2 }
+          ^^^^^^^^^^^^^^^^^^^^^^^
+  - name: LocalVariableAndWriteNode
+    fields:
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: name
+        type: constant
+      - name: depth
+        type: uint32
+    comment: |
+      Represents the use of the `&&=` operator for assignment to a local variable.
+
+          target &&= value
+          ^^^^^^^^^^^^^^^^
+  - name: LocalVariableOperatorWriteNode
+    fields:
+      - name: name_loc
+        type: location
+      - name: binary_operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: name
+        type: constant
+      - name: binary_operator
+        type: constant
+      - name: depth
+        type: uint32
+    comment: |
+      Represents assigning to a local variable using an operator that isn't `=`.
+
+          target += value
+          ^^^^^^^^^^^^^^^
+  - name: LocalVariableOrWriteNode
+    fields:
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: name
+        type: constant
+      - name: depth
+        type: uint32
+    comment: |
+      Represents the use of the `||=` operator for assignment to a local variable.
+
+          target ||= value
+          ^^^^^^^^^^^^^^^^
+  - name: LocalVariableReadNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the local variable, which is an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              x      # name `:x`
+
+              _Test  # name `:_Test`
+
+          Note that this can also be an underscore followed by a number for the default block parameters.
+
+              _1     # name `:_1`
+
+      - name: depth
+        type: uint32
+        comment: |
+          The number of visible scopes that should be searched to find the origin of this local variable.
+
+              foo = 1; foo # depth 0
+
+              bar = 2; tap { bar } # depth 1
+
+          The specific rules for calculating the depth may differ from individual Ruby implementations, as they are not specified by the language. For more information, see [the Prism documentation](https://github.com/ruby/prism/blob/main/docs/local_variable_depth.md).
+    comment: |
+      Represents reading a local variable. Note that this requires that a local variable of the same name has already been written to in the same scope, otherwise it is parsed as a method call.
+
+          foo
+          ^^^
+  - name: LocalVariableTargetNode
+    fields:
+      - name: name
+        type: constant
+      - name: depth
+        type: uint32
+    comment: |
+      Represents writing to a local variable in a context that doesn't have an explicit value.
+
+          foo, bar = baz
+          ^^^  ^^^
+
+          foo => baz
+                 ^^^
+  - name: LocalVariableWriteNode
+    fields:
+      - name: name
+        type: constant
+        comment: |
+          The name of the local variable, which is an [identifier](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#identifiers).
+
+              foo = :bar # name `:foo`
+
+              abc = 123  # name `:abc`
+      - name: depth
+        type: uint32
+        comment: |
+          The number of semantic scopes we have to traverse to find the declaration of this variable.
+
+              foo = 1         # depth 0
+
+              tap { foo = 1 } # depth 1
+
+          The specific rules for calculating the depth may differ from individual Ruby implementations, as they are not specified by the language. For more information, see [the Prism documentation](https://github.com/ruby/prism/blob/main/docs/local_variable_depth.md).
+      - name: name_loc
+        type: location
+        comment: |
+          The location of the variable name.
+
+              foo = :bar
+              ^^^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the local variable. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              foo = :bar
+                    ^^^^
+
+              abc = 1234
+                    ^^^^
+
+          Note that since the name of a local variable is known before the value is parsed, it is valid for a local variable to appear within the value of its own write.
+
+              foo = foo
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `=` operator.
+
+              x = :y
+                ^
+    comment: |
+      Represents writing to a local variable.
+
+          foo = 1
+          ^^^^^^^
+  - name: MatchLastLineNode
+    flags: RegularExpressionFlags
+    fields:
+      - name: opening_loc
+        type: location
+      - name: content_loc
+        type: location
+      - name: closing_loc
+        type: location
+      - name: unescaped
+        type: string
+    comment: |
+      Represents a regular expression literal used in the predicate of a conditional to implicitly match against the last line read by an IO object.
+
+          if /foo/i then end
+             ^^^^^^
+  - name: MatchPredicateNode
+    fields:
+      - name: value
+        type: node
+        kind: non-void expression
+      - name: pattern
+        type: node
+        kind: pattern expression
+      - name: operator_loc
+        type: location
+    comment: |
+      Represents the use of the modifier `in` operator.
+
+          foo in bar
+          ^^^^^^^^^^
+  - name: MatchRequiredNode
+    fields:
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the left-hand side of the operator.
+
+              foo => bar
+              ^^^
+      - name: pattern
+        type: node
+        kind: pattern expression
+        comment: |
+          Represents the right-hand side of the operator. The type of the node depends on the expression.
+
+          Anything that looks like a local variable name (including `_`) will result in a `LocalVariableTargetNode`.
+
+              foo => a # This is equivalent to writing `a = foo`
+                     ^
+
+          Using an explicit `Array` or combining expressions with `,` will result in a `ArrayPatternNode`. This can be preceded by a constant.
+
+              foo => [a]
+                     ^^^
+
+              foo => a, b
+                     ^^^^
+
+              foo => Bar[a, b]
+                     ^^^^^^^^^
+
+          If the array pattern contains at least two wildcard matches, a `FindPatternNode` is created instead.
+
+              foo => *, 1, *a
+                     ^^^^^
+
+          Using an explicit `Hash` or a constant with square brackets and hash keys in the square brackets will result in a `HashPatternNode`.
+
+              foo => { a: 1, b: }
+
+              foo => Bar[a: 1, b:]
+
+              foo => Bar[**]
+
+          To use any variable that needs run time evaluation, pinning is required. This results in a `PinnedVariableNode`
+
+              foo => ^a
+                     ^^
+
+          Similar, any expression can be used with pinning. This results in a `PinnedExpressionNode`.
+
+              foo => ^(a + 1)
+
+          Anything else will result in the regular node for that expression, for example a `ConstantReadNode`.
+
+              foo => CONST
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the operator.
+
+              foo => bar
+                  ^^
+    comment: |
+      Represents the use of the `=>` operator.
+
+          foo => bar
+          ^^^^^^^^^^
+  - name: MatchWriteNode
+    fields:
+      - name: call
+        type: node
+        kind: CallNode
+      - name: targets
+        type: node[]
+        kind: LocalVariableTargetNode
+    comment: |
+      Represents writing local variables using a regular expression match with named capture groups.
+
+          /(?<foo>bar)/ =~ baz
+          ^^^^^^^^^^^^^^^^^^^^
+  - name: MissingNode
+    comment: |
+      Represents a node that is missing from the source and results in a syntax error.
+  - name: ModuleNode
+    fields:
+      - name: locals
+        type: constant[]
+      - name: module_keyword_loc
+        type: location
+      - name: constant_path
+        type: node
+        kind:
+          - ConstantReadNode
+          - ConstantPathNode
+          - on error: MissingNode # module Parent module end
+      - name: body
+        type: node?
+        kind:
+          - StatementsNode
+          - BeginNode
+      - name: end_keyword_loc
+        type: location
+      - name: name
+        type: constant
+    comment: |
+      Represents a module declaration involving the `module` keyword.
+
+          module Foo end
+          ^^^^^^^^^^^^^^
+  - name: MultiTargetNode
+    fields:
+      - name: lefts
+        type: node[]
+        kind:
+          - LocalVariableTargetNode
+          - InstanceVariableTargetNode
+          - ClassVariableTargetNode
+          - GlobalVariableTargetNode
+          - ConstantTargetNode
+          - ConstantPathTargetNode
+          - CallTargetNode
+          - IndexTargetNode
+          - MultiTargetNode
+          - RequiredParameterNode # def m((a,b)); end
+          - on error: BackReferenceReadNode # a, (b, $&) = z
+          - on error: NumberedReferenceReadNode # a, (b, $1) = z
+        comment: |
+          Represents the targets expressions before a splat node.
+
+              a, (b, c, *) = 1, 2, 3, 4, 5
+                  ^^^^
+
+          The splat node can be absent, in that case all target expressions are in the left field.
+
+              a, (b, c) = 1, 2, 3, 4, 5
+                  ^^^^
+      - name: rest
+        type: node?
+        kind:
+          - ImplicitRestNode
+          - SplatNode
+        comment: |
+          Represents a splat node in the target expression.
+
+              a, (b, *c) = 1, 2, 3, 4
+                     ^^
+
+          The variable can be empty, this results in a `SplatNode` with a `nil` expression field.
+
+              a, (b, *) = 1, 2, 3, 4
+                     ^
+
+          If the `*` is omitted, this field will contain an `ImplicitRestNode`
+
+              a, (b,) = 1, 2, 3, 4
+                   ^
+      - name: rights
+        type: node[]
+        kind:
+          - LocalVariableTargetNode
+          - InstanceVariableTargetNode
+          - ClassVariableTargetNode
+          - GlobalVariableTargetNode
+          - ConstantTargetNode
+          - ConstantPathTargetNode
+          - CallTargetNode
+          - IndexTargetNode
+          - MultiTargetNode
+          - RequiredParameterNode # def m((*,b)); end
+          - on error: BackReferenceReadNode # a, (*, $&) = z
+          - on error: NumberedReferenceReadNode # a, (*, $1) = z
+        comment: |
+          Represents the targets expressions after a splat node.
+
+              a, (*, b, c) = 1, 2, 3, 4, 5
+                     ^^^^
+      - name: lparen_loc
+        type: location?
+        comment: |
+          The location of the opening parenthesis.
+
+              a, (b, c) = 1, 2, 3
+                 ^
+      - name: rparen_loc
+        type: location?
+        comment: |
+          The location of the closing parenthesis.
+
+              a, (b, c) = 1, 2, 3
+                      ^
+    comment: |
+      Represents a multi-target expression.
+
+          a, (b, c) = 1, 2, 3
+             ^^^^^^
+
+      This can be a part of `MultiWriteNode` as above, or the target of a `for` loop
+
+          for a, b in [[1, 2], [3, 4]]
+              ^^^^
+  - name: MultiWriteNode
+    fields:
+      - name: lefts
+        type: node[]
+        kind:
+          - LocalVariableTargetNode
+          - InstanceVariableTargetNode
+          - ClassVariableTargetNode
+          - GlobalVariableTargetNode
+          - ConstantTargetNode
+          - ConstantPathTargetNode
+          - CallTargetNode
+          - IndexTargetNode
+          - MultiTargetNode
+          - on error: BackReferenceReadNode # $&, = z
+          - on error: NumberedReferenceReadNode # $1, = z
+        comment: |
+          Represents the targets expressions before a splat node.
+
+              a, b, * = 1, 2, 3, 4, 5
+              ^^^^
+
+          The splat node can be absent, in that case all target expressions are in the left field.
+
+              a, b, c = 1, 2, 3, 4, 5
+              ^^^^^^^
+      - name: rest
+        type: node?
+        kind:
+          - ImplicitRestNode
+          - SplatNode
+        comment: |
+          Represents a splat node in the target expression.
+
+              a, b, *c = 1, 2, 3, 4
+                    ^^
+
+          The variable can be empty, this results in a `SplatNode` with a `nil` expression field.
+
+              a, b, * = 1, 2, 3, 4
+                    ^
+
+          If the `*` is omitted, this field will contain an `ImplicitRestNode`
+
+              a, b, = 1, 2, 3, 4
+                  ^
+      - name: rights
+        type: node[]
+        kind:
+          - LocalVariableTargetNode
+          - InstanceVariableTargetNode
+          - ClassVariableTargetNode
+          - GlobalVariableTargetNode
+          - ConstantTargetNode
+          - ConstantPathTargetNode
+          - CallTargetNode
+          - IndexTargetNode
+          - MultiTargetNode
+          - on error: BackReferenceReadNode # *, $& = z
+          - on error: NumberedReferenceReadNode # *, $1 = z
+        comment: |
+          Represents the targets expressions after a splat node.
+
+              a, *, b, c = 1, 2, 3, 4, 5
+                    ^^^^
+      - name: lparen_loc
+        type: location?
+        comment: |
+          The location of the opening parenthesis.
+
+              (a, b, c) = 1, 2, 3
+              ^
+      - name: rparen_loc
+        type: location?
+        comment: |
+          The location of the closing parenthesis.
+
+              (a, b, c) = 1, 2, 3
+                      ^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the operator.
+
+              a, b, c = 1, 2, 3
+                      ^
+      - name: value
+        type: node
+        kind: non-void expression
+        comment: |
+          The value to write to the targets. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              a, b, c = 1, 2, 3
+                        ^^^^^^^
+    comment: |
+      Represents a write to a multi-target expression.
+
+          a, b, c = 1, 2, 3
+          ^^^^^^^^^^^^^^^^^
+  - name: NextNode
+    fields:
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+      - name: keyword_loc
+        type: location
+    comment: |
+      Represents the use of the `next` keyword.
+
+          next 1
+          ^^^^^^
+  - name: NilNode
+    comment: |
+      Represents the use of the `nil` keyword.
+
+          nil
+          ^^^
+  - name: NoKeywordsParameterNode
+    fields:
+      - name: operator_loc
+        type: location
+      - name: keyword_loc
+        type: location
+    comment: |
+      Represents the use of `**nil` inside method arguments.
+
+          def a(**nil)
+                ^^^^^
+          end
+  - name: NumberedParametersNode
+    fields:
+      - name: maximum
+        type: uint8
+    comment: |
+      Represents an implicit set of parameters through the use of numbered parameters within a block or lambda.
+
+          -> { _1 + _2 }
+          ^^^^^^^^^^^^^^
+  - name: NumberedReferenceReadNode
+    fields:
+      - name: number
+        type: uint32
+        comment: |
+          The (1-indexed, from the left) number of the capture group. Numbered references that are too large result in this value being `0`.
+
+              $1          # number `1`
+
+              $5432       # number `5432`
+
+              $4294967296 # number `0`
+    comment: |
+      Represents reading a numbered reference to a capture in the previous match.
+
+          $1
+          ^^
+  - name: OptionalKeywordParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents an optional keyword parameter to a method, block, or lambda definition.
+
+          def a(b: 1)
+                ^^^^
+          end
+  - name: OptionalParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: value
+        type: node
+        kind: non-void expression
+    comment: |
+      Represents an optional parameter to a method, block, or lambda definition.
+
+          def a(b = 1)
+                ^^^^^
+          end
+  - name: OrNode
+    fields:
+      - name: left
+        type: node
+        kind: non-void expression
+        comment: |
+          Represents the left side of the expression. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              left or right
+              ^^^^
+
+              1 || 2
+              ^
+      - name: right
+        type: node
+        kind: Node
+        comment: |
+          Represents the right side of the expression.
+
+              left || right
+                      ^^^^^
+
+              1 or 2
+                   ^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `or` keyword or the `||` operator.
+
+              left or right
+                   ^^
+    comment: |
+      Represents the use of the `||` operator or the `or` keyword.
+
+          left or right
+          ^^^^^^^^^^^^^
+  - name: ParametersNode
+    fields:
+      - name: requireds
+        type: node[]
+        kind:
+          - RequiredParameterNode
+          - MultiTargetNode
+      - name: optionals
+        type: node[]
+        kind: OptionalParameterNode
+      - name: rest
+        type: node?
+        kind:
+          - RestParameterNode
+          - ImplicitRestNode # Only in block parameters
+      - name: posts
+        type: node[]
+        kind:
+          - RequiredParameterNode
+          - MultiTargetNode
+          # On parsing error of `f(**kwargs, ...)` or `f(**nil, ...)`, the keyword_rest value is moved here:
+          - on error: KeywordRestParameterNode
+          - on error: NoKeywordsParameterNode
+          # On parsing error of `f(..., ...)`, the first forwarding parameter is moved here:
+          - on error: ForwardingParameterNode
+      - name: keywords
+        type: node[]
+        kind:
+          - RequiredKeywordParameterNode
+          - OptionalKeywordParameterNode
+      - name: keyword_rest
+        type: node?
+        kind:
+          - KeywordRestParameterNode
+          - ForwardingParameterNode
+          - NoKeywordsParameterNode
+      - name: block
+        type: node?
+        kind: BlockParameterNode
+    comment: |
+      Represents the list of parameters on a method, block, or lambda definition.
+
+          def a(b, c, d)
+                ^^^^^^^
+          end
+  - name: ParenthesesNode
+    flags: ParenthesesNodeFlags
+    fields:
+      - name: body
+        type: node?
+        kind: non-void expression # Usually a StatementsNode but not always e.g. `1 in (..10)`
+      - name: opening_loc
+        type: location
+      - name: closing_loc
+        type: location
+    newline: false
+    comment: |
+      Represents a parenthesized expression
+
+          (10 + 34)
+          ^^^^^^^^^
+  - name: PinnedExpressionNode
+    fields:
+      - name: expression
+        type: node
+        kind: non-void expression
+        comment: |
+          The expression used in the pinned expression
+
+              foo in ^(bar)
+                       ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `^` operator
+
+              foo in ^(bar)
+                     ^
+      - name: lparen_loc
+        type: location
+        comment: |
+          The location of the opening parenthesis.
+
+              foo in ^(bar)
+                      ^
+      - name: rparen_loc
+        type: location
+        comment: |
+          The location of the closing parenthesis.
+
+              foo in ^(bar)
+                          ^
+    comment: |
+      Represents the use of the `^` operator for pinning an expression in a pattern matching expression.
+
+          foo in ^(bar)
+                 ^^^^^^
+  - name: PinnedVariableNode
+    fields:
+      - name: variable
+        type: node
+        kind:
+          - LocalVariableReadNode
+          - InstanceVariableReadNode
+          - ClassVariableReadNode
+          - GlobalVariableReadNode # foo in ^$a
+          - BackReferenceReadNode # foo in ^$&
+          - NumberedReferenceReadNode # foo in ^$1
+          - ItLocalVariableReadNode # proc { 1 in ^it }
+          - on error: MissingNode # foo in ^Bar
+        comment: |
+          The variable used in the pinned expression
+
+              foo in ^bar
+                      ^^^
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `^` operator
+
+              foo in ^bar
+                     ^
+    comment: |
+      Represents the use of the `^` operator for pinning a variable in a pattern matching expression.
+
+          foo in ^bar
+                 ^^^^
+  - name: PostExecutionNode
+    fields:
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: keyword_loc
+        type: location
+      - name: opening_loc
+        type: location
+      - name: closing_loc
+        type: location
+    comment: |
+      Represents the use of the `END` keyword.
+
+          END { foo }
+          ^^^^^^^^^^^
+  - name: PreExecutionNode
+    fields:
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: keyword_loc
+        type: location
+      - name: opening_loc
+        type: location
+      - name: closing_loc
+        type: location
+    comment: |
+      Represents the use of the `BEGIN` keyword.
+
+          BEGIN { foo }
+          ^^^^^^^^^^^^^
+  - name: ProgramNode
+    fields:
+      - name: locals
+        type: constant[]
+      - name: statements
+        type: node
+        kind: StatementsNode
+    comment: The top level node of any parse tree.
+  - name: RangeNode
+    flags: RangeFlags
+    fields:
+      - name: left
+        type: node?
+        kind: non-void expression
+        comment: |
+          The left-hand side of the range, if present. It can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              1...
+              ^
+
+              hello...goodbye
+              ^^^^^
+      - name: right
+        type: node?
+        kind: non-void expression
+        comment: |
+          The right-hand side of the range, if present. It can be either `nil` or any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              ..5
+                ^
+
+              1...foo
+                  ^^^
+          If neither right-hand or left-hand side was included, this will be a MissingNode.
+      - name: operator_loc
+        type: location
+        comment: |
+          The location of the `..` or `...` operator.
+    comment: |
+      Represents the use of the `..` or `...` operators.
+
+          1..2
+          ^^^^
+
+          c if a =~ /left/ ... b =~ /right/
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  - name: RationalNode
+    flags: IntegerBaseFlags
+    fields:
+      - name: numerator
+        type: integer
+        comment: |
+          The numerator of the rational number.
+
+              1.5r # numerator 3
+      - name: denominator
+        type: integer
+        comment: |
+          The denominator of the rational number.
+
+              1.5r # denominator 2
+    comment: |
+      Represents a rational number literal.
+
+          1.0r
+          ^^^^
+  - name: RedoNode
+    comment: |
+      Represents the use of the `redo` keyword.
+
+          redo
+          ^^^^
+  - name: RegularExpressionNode
+    flags: RegularExpressionFlags
+    fields:
+      - name: opening_loc
+        type: location
+      - name: content_loc
+        type: location
+      - name: closing_loc
+        type: location
+      - name: unescaped
+        type: string
+    comment: |
+      Represents a regular expression literal with no interpolation.
+
+          /foo/i
+          ^^^^^^
+  - name: RequiredKeywordParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant
+      - name: name_loc
+        type: location
+    comment: |
+      Represents a required keyword parameter to a method, block, or lambda definition.
+
+          def a(b: )
+                ^^
+          end
+  - name: RequiredParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant
+    comment: |
+      Represents a required parameter to a method, block, or lambda definition.
+
+          def a(b)
+                ^
+          end
+  - name: RescueModifierNode
+    fields:
+      - name: expression
+        type: node
+        kind: Node
+      - name: keyword_loc
+        type: location
+      - name: rescue_expression
+        type: node
+        kind: Node
+    newline: expression
+    comment: |
+      Represents an expression modified with a rescue.
+
+          foo rescue nil
+          ^^^^^^^^^^^^^^
+  - name: RescueNode
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: exceptions
+        type: node[]
+        kind: non-void expression
+      - name: operator_loc
+        type: location?
+      - name: reference
+        type: node?
+        kind:
+          - LocalVariableTargetNode
+          - InstanceVariableTargetNode
+          - ClassVariableTargetNode
+          - GlobalVariableTargetNode
+          - ConstantTargetNode
+          - ConstantPathTargetNode
+          - CallTargetNode
+          - IndexTargetNode
+          - on error: BackReferenceReadNode # => begin; rescue => $&; end
+          - on error: NumberedReferenceReadNode # => begin; rescue => $1; end
+          - on error: MissingNode # begin; rescue =>; end
+      - name: then_keyword_loc
+        type: location?
+      - name: statements
+        type: node?
+        kind: StatementsNode
+      - name: subsequent
+        type: node?
+        kind: RescueNode
+    comment: |
+      Represents a rescue statement.
+
+          begin
+          rescue Foo, *splat, Bar => ex
+            foo
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+          end
+
+      `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
+  - name: RestParameterNode
+    flags: ParameterFlags
+    fields:
+      - name: name
+        type: constant?
+      - name: name_loc
+        type: location?
+      - name: operator_loc
+        type: location
+    comment: |
+      Represents a rest parameter to a method, block, or lambda definition.
+
+          def a(*b)
+                ^^
+          end
+  - name: RetryNode
+    comment: |
+      Represents the use of the `retry` keyword.
+
+          retry
+          ^^^^^
+  - name: ReturnNode
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+    comment: |
+      Represents the use of the `return` keyword.
+
+          return 1
+          ^^^^^^^^
+  - name: SelfNode
+    comment: |
+      Represents the `self` keyword.
+
+          self
+          ^^^^
+  - name: ShareableConstantNode
+    flags: ShareableConstantNodeFlags
+    fields:
+      - name: write
+        type: node
+        kind:
+          - ConstantWriteNode
+          - ConstantAndWriteNode
+          - ConstantOrWriteNode
+          - ConstantOperatorWriteNode
+          - ConstantPathWriteNode
+          - ConstantPathAndWriteNode
+          - ConstantPathOrWriteNode
+          - ConstantPathOperatorWriteNode
+        comment: The constant write that should be modified with the shareability state.
+    comment: |
+      This node wraps a constant write to indicate that when the value is written, it should have its shareability state modified.
+
+          # shareable_constant_value: literal
+          C = { a: 1 }
+          ^^^^^^^^^^^^
+  - name: SingletonClassNode
+    fields:
+      - name: locals
+        type: constant[]
+      - name: class_keyword_loc
+        type: location
+      - name: operator_loc
+        type: location
+      - name: expression
+        type: node
+        kind: non-void expression
+      - name: body
+        type: node?
+        kind:
+          - StatementsNode
+          - BeginNode
+      - name: end_keyword_loc
+        type: location
+    comment: |
+      Represents a singleton class declaration involving the `class` keyword.
+
+          class << self end
+          ^^^^^^^^^^^^^^^^^
+  - name: SourceEncodingNode
+    comment: |
+      Represents the use of the `__ENCODING__` keyword.
+
+          __ENCODING__
+          ^^^^^^^^^^^^
+  - name: SourceFileNode
+    flags: StringFlags
+    fields:
+      - name: filepath
+        type: string
+        comment: Represents the file path being parsed. This corresponds directly to the `filepath` option given to the various `Prism::parse*` APIs.
+    comment: |
+      Represents the use of the `__FILE__` keyword.
+
+          __FILE__
+          ^^^^^^^^
+  - name: SourceLineNode
+    comment: |
+      Represents the use of the `__LINE__` keyword.
+
+          __LINE__
+          ^^^^^^^^
+  - name: SplatNode
+    fields:
+      - name: operator_loc
+        type: location
+      - name: expression
+        type: node?
+        kind: non-void expression
+    comment: |
+      Represents the use of the splat operator.
+
+          [*a]
+           ^^
+  - name: StatementsNode
+    fields:
+      - name: body
+        type: node[]
+        kind: Node
+    comment: |
+      Represents a set of statements contained within some scope.
+
+          foo; bar; baz
+          ^^^^^^^^^^^^^
+  - name: StringNode
+    flags: StringFlags
+    fields:
+      - name: opening_loc
+        type: location?
+      - name: content_loc
+        type: location
+      - name: closing_loc
+        type: location?
+      - name: unescaped
+        type: string
+    comment: |
+      Represents a string literal, a string contained within a `%w` list, or plain string content within an interpolated string.
+
+          "foo"
+          ^^^^^
+
+          %w[foo]
+             ^^^
+
+          "foo #{bar} baz"
+           ^^^^      ^^^^
+  - name: SuperNode
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: lparen_loc
+        type: location?
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+        comment: "Can be only `nil` when there are empty parentheses, like `super()`."
+      - name: rparen_loc
+        type: location?
+      - name: block
+        type: node?
+        kind:
+          - BlockNode
+          - BlockArgumentNode
+    comment: |
+      Represents the use of the `super` keyword with parentheses or arguments.
+
+          super()
+          ^^^^^^^
+
+          super foo, bar
+          ^^^^^^^^^^^^^^
+
+      If no arguments are provided (except for a block), it would be a `ForwardingSuperNode` instead.
+  - name: SymbolNode
+    flags: SymbolFlags
+    fields:
+      - name: opening_loc
+        type: location?
+      - name: value_loc
+        type: location?
+      - name: closing_loc
+        type: location?
+      - name: unescaped
+        type: string
+    comment: |
+      Represents a symbol literal or a symbol contained within a `%i` list.
+
+          :foo
+          ^^^^
+
+          %i[foo]
+             ^^^
+  - name: TrueNode
+    comment: |
+      Represents the use of the literal `true` keyword.
+
+          true
+          ^^^^
+  - name: UndefNode
+    fields:
+      - name: names
+        type: node[]
+        kind:
+          - SymbolNode
+          - InterpolatedSymbolNode
+      - name: keyword_loc
+        type: location
+    comment: |
+      Represents the use of the `undef` keyword.
+
+          undef :foo, :bar, :baz
+          ^^^^^^^^^^^^^^^^^^^^^^
+  - name: UnlessNode
+    fields:
+      - name: keyword_loc
+        type: location
+        comment: |
+          The location of the `unless` keyword.
+
+              unless cond then bar end
+              ^^^^^^
+
+              bar unless cond
+                  ^^^^^^
+      - name: predicate
+        type: node
+        kind: non-void expression
+        comment: |
+          The condition to be evaluated for the unless expression. It can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
+
+              unless cond then bar end
+                     ^^^^
+
+              bar unless cond
+                         ^^^^
+      - name: then_keyword_loc
+        type: location?
+        comment: |
+          The location of the `then` keyword, if present.
+
+              unless cond then bar end
+                          ^^^^
+      - name: statements
+        type: node?
+        kind: StatementsNode
+        comment: |
+          The body of statements that will executed if the unless condition is
+          falsey. Will be `nil` if no body is provided.
+
+              unless cond then bar end
+                               ^^^
+      - name: else_clause
+        type: node?
+        kind: ElseNode
+        comment: |
+          The else clause of the unless expression, if present.
+
+              unless cond then bar else baz end
+                                   ^^^^^^^^
+      - name: end_keyword_loc
+        type: location?
+        comment: |
+          The location of the `end` keyword, if present.
+
+              unless cond then bar end
+                                   ^^^
+    newline: predicate
+    comment: |
+      Represents the use of the `unless` keyword, either in the block form or the modifier form.
+
+          bar unless foo
+          ^^^^^^^^^^^^^^
+
+          unless foo then bar end
+          ^^^^^^^^^^^^^^^^^^^^^^^
+  - name: UntilNode
+    flags: LoopFlags
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: do_keyword_loc
+        type: location?
+      - name: closing_loc
+        type: location?
+      - name: predicate
+        type: node
+        kind: non-void expression
+      - name: statements
+        type: node?
+        kind: StatementsNode
+    newline: predicate
+    comment: |
+      Represents the use of the `until` keyword, either in the block form or the modifier form.
+
+          bar until foo
+          ^^^^^^^^^^^^^
+
+          until foo do bar end
+          ^^^^^^^^^^^^^^^^^^^^
+  - name: WhenNode
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: conditions
+        type: node[]
+        kind: non-void expression
+      - name: then_keyword_loc
+        type: location?
+      - name: statements
+        type: node?
+        kind: StatementsNode
+    comment: |
+      Represents the use of the `when` keyword within a case statement.
+
+          case true
+          when true
+          ^^^^^^^^^
+          end
+  - name: WhileNode
+    flags: LoopFlags
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: do_keyword_loc
+        type: location?
+      - name: closing_loc
+        type: location?
+      - name: predicate
+        type: node
+        kind: non-void expression
+      - name: statements
+        type: node?
+        kind: StatementsNode
+    newline: predicate
+    comment: |
+      Represents the use of the `while` keyword, either in the block form or the modifier form.
+
+          bar while foo
+          ^^^^^^^^^^^^^
+
+          while foo do bar end
+          ^^^^^^^^^^^^^^^^^^^^
+  - name: XStringNode
+    flags: EncodingFlags
+    fields:
+      - name: opening_loc
+        type: location
+      - name: content_loc
+        type: location
+      - name: closing_loc
+        type: location
+      - name: unescaped
+        type: string
+    comment: |
+      Represents an xstring literal with no interpolation.
+
+          `foo`
+          ^^^^^
+  - name: YieldNode
+    fields:
+      - name: keyword_loc
+        type: location
+      - name: lparen_loc
+        type: location?
+      - name: arguments
+        type: node?
+        kind: ArgumentsNode
+      - name: rparen_loc
+        type: location?
+    comment: |
+      Represents the use of the `yield` keyword.
+
+          yield 1
+          ^^^^^^^
diff --git a/prism/defines.h b/prism/defines.h
new file mode 100644
index 0000000000..e31429c789
--- /dev/null
+++ b/prism/defines.h
@@ -0,0 +1,260 @@
+/**
+ * @file defines.h
+ *
+ * Macro definitions used throughout the prism library.
+ *
+ * This file should be included first by any *.h or *.c in prism for consistency
+ * and to ensure that the macros are defined before they are used.
+ */
+#ifndef PRISM_DEFINES_H
+#define PRISM_DEFINES_H
+
+#include <ctype.h>
+#include <limits.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * We want to be able to use the PRI* macros for printing out integers, but on
+ * some platforms they aren't included unless this is already defined.
+ */
+#define __STDC_FORMAT_MACROS
+// Include sys/types.h before inttypes.h to work around issue with
+// certain versions of GCC and newlib which causes omission of PRIx64
+#include <sys/types.h>
+#include <inttypes.h>
+
+/**
+ * When we are parsing using recursive descent, we want to protect against
+ * malicious payloads that could attempt to crash our parser. We do this by
+ * specifying a maximum depth to which we are allowed to recurse.
+ */
+#ifndef PRISM_DEPTH_MAXIMUM
+    #define PRISM_DEPTH_MAXIMUM 10000
+#endif
+
+/**
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
+ * need to mark certain functions as being publically-visible. This macro does
+ * that in a compiler-agnostic way.
+ */
+#ifndef PRISM_EXPORTED_FUNCTION
+#   ifdef PRISM_EXPORT_SYMBOLS
+#       ifdef _WIN32
+#          define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
+#       else
+#          define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
+#       endif
+#   else
+#       define PRISM_EXPORTED_FUNCTION
+#   endif
+#endif
+
+/**
+ * Certain compilers support specifying that a function accepts variadic
+ * parameters that look like printf format strings to provide a better developer
+ * experience when someone is using the function. This macro does that in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+#   if defined(__MINGW_PRINTF_FORMAT)
+#       define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index)))
+#   else
+#       define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
+#   endif
+#elif defined(__clang__)
+#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
+#else
+#   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
+#endif
+
+/**
+ * GCC will warn if you specify a function or parameter that is unused at
+ * runtime. This macro allows you to mark a function or parameter as unused in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+#   define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#   define PRISM_ATTRIBUTE_UNUSED
+#endif
+
+/**
+ * Old Visual Studio versions do not support the inline keyword, so we need to
+ * define it to be __inline.
+ */
+#if defined(_MSC_VER) && !defined(inline)
+#   define inline __inline
+#endif
+
+/**
+ * Old Visual Studio versions before 2015 do not implement sprintf, but instead
+ * implement _snprintf. We standard that here.
+ */
+#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
+#   define snprintf _snprintf
+#endif
+
+/**
+ * A simple utility macro to concatenate two tokens together, necessary when one
+ * of the tokens is itself a macro.
+ */
+#define PM_CONCATENATE(left, right) left ## right
+
+/**
+ * We want to be able to use static assertions, but they weren't standardized
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
+ * fail to compile due to a negative array size if the condition is false.
+ */
+#if defined(_Static_assert)
+#   define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
+#else
+#   define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
+#endif
+
+/**
+ * In general, libc for embedded systems does not support memory-mapped files.
+ * If the target platform is POSIX or Windows, we can map a file in memory and
+ * read it in a more efficient manner.
+ */
+#ifdef _WIN32
+#   define PRISM_HAS_MMAP
+#else
+#   include <unistd.h>
+#   ifdef _POSIX_MAPPED_FILES
+#       define PRISM_HAS_MMAP
+#   endif
+#endif
+
+/**
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
+ * related code from the library. All filesystem related code should be guarded
+ * by PRISM_HAS_FILESYSTEM.
+ */
+#ifndef PRISM_HAS_NO_FILESYSTEM
+#   define PRISM_HAS_FILESYSTEM
+#endif
+
+/**
+ * isinf on POSIX systems it accepts a float, a double, or a long double.
+ * But mingw didn't provide an isinf macro, only an isinf function that only
+ * accepts floats, so we need to use _finite instead.
+ */
+#ifdef __MINGW64__
+    #include <float.h>
+    #define PRISM_ISINF(x) (!_finite(x))
+#else
+    #define PRISM_ISINF(x) isinf(x)
+#endif
+
+/**
+ * If you build prism with a custom allocator, configure it with
+ * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
+ * xrealloc, xcalloc, and xfree.
+ *
+ * For example, your `prism_xallocator.h` file could look like this:
+ *
+ * ```
+ * #ifndef PRISM_XALLOCATOR_H
+ * #define PRISM_XALLOCATOR_H
+ * #define xmalloc      my_malloc
+ * #define xrealloc     my_realloc
+ * #define xcalloc      my_calloc
+ * #define xfree        my_free
+ * #endif
+ * ```
+ */
+#ifdef PRISM_XALLOCATOR
+    #include "prism_xallocator.h"
+#else
+    #ifndef xmalloc
+        /**
+         * The malloc function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define.
+         */
+        #define xmalloc malloc
+    #endif
+
+    #ifndef xrealloc
+        /**
+         * The realloc function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define.
+         */
+        #define xrealloc realloc
+    #endif
+
+    #ifndef xcalloc
+        /**
+         * The calloc function that should be used. This can be overridden with
+         * the PRISM_XALLOCATOR define.
+         */
+        #define xcalloc calloc
+    #endif
+
+    #ifndef xfree
+        /**
+         * The free function that should be used. This can be overridden with the
+         * PRISM_XALLOCATOR define.
+         */
+        #define xfree free
+    #endif
+#endif
+
+/**
+ * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
+ * switch that will turn off certain features of prism.
+ */
+#ifdef PRISM_BUILD_MINIMAL
+    /** Exclude the serialization API. */
+    #define PRISM_EXCLUDE_SERIALIZATION
+
+    /** Exclude the JSON serialization API. */
+    #define PRISM_EXCLUDE_JSON
+
+    /** Exclude the Array#pack parser API. */
+    #define PRISM_EXCLUDE_PACK
+
+    /** Exclude the prettyprint API. */
+    #define PRISM_EXCLUDE_PRETTYPRINT
+
+    /** Exclude the full set of encodings, using the minimal only. */
+    #define PRISM_ENCODING_EXCLUDE_FULL
+#endif
+
+/**
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
+ * branch predication.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+    /** The compiler should predicate that this branch will be taken. */
+    #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
+
+    /** The compiler should predicate that this branch will not be taken. */
+    #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+    /** Void because this platform does not support branch prediction hints. */
+    #define PRISM_LIKELY(x)   (x)
+
+    /** Void because this platform does not support branch prediction hints. */
+    #define PRISM_UNLIKELY(x) (x)
+#endif
+
+/**
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
+ * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
+    #define PRISM_FALLTHROUGH [[fallthrough]];
+#elif defined(__GNUC__) || defined(__clang__)
+    #define PRISM_FALLTHROUGH __attribute__((fallthrough));
+#elif defined(_MSC_VER)
+    #define PRISM_FALLTHROUGH __fallthrough;
+#else
+    #define PRISM_FALLTHROUGH
+#endif
+
+#endif
diff --git a/prism/encoding.c b/prism/encoding.c
new file mode 100644
index 0000000000..d7e5616840
--- /dev/null
+++ b/prism/encoding.c
@@ -0,0 +1,5340 @@
+#include "prism/encoding.h"
+
+typedef uint32_t pm_unicode_codepoint_t;
+
+#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1508
+static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
+    0x100, 0x2C1,
+    0x2C6, 0x2D1,
+    0x2E0, 0x2E4,
+    0x2EC, 0x2EC,
+    0x2EE, 0x2EE,
+    0x345, 0x345,
+    0x363, 0x374,
+    0x376, 0x377,
+    0x37A, 0x37D,
+    0x37F, 0x37F,
+    0x386, 0x386,
+    0x388, 0x38A,
+    0x38C, 0x38C,
+    0x38E, 0x3A1,
+    0x3A3, 0x3F5,
+    0x3F7, 0x481,
+    0x48A, 0x52F,
+    0x531, 0x556,
+    0x559, 0x559,
+    0x560, 0x588,
+    0x5B0, 0x5BD,
+    0x5BF, 0x5BF,
+    0x5C1, 0x5C2,
+    0x5C4, 0x5C5,
+    0x5C7, 0x5C7,
+    0x5D0, 0x5EA,
+    0x5EF, 0x5F2,
+    0x610, 0x61A,
+    0x620, 0x657,
+    0x659, 0x65F,
+    0x66E, 0x6D3,
+    0x6D5, 0x6DC,
+    0x6E1, 0x6E8,
+    0x6ED, 0x6EF,
+    0x6FA, 0x6FC,
+    0x6FF, 0x6FF,
+    0x710, 0x73F,
+    0x74D, 0x7B1,
+    0x7CA, 0x7EA,
+    0x7F4, 0x7F5,
+    0x7FA, 0x7FA,
+    0x800, 0x817,
+    0x81A, 0x82C,
+    0x840, 0x858,
+    0x860, 0x86A,
+    0x870, 0x887,
+    0x889, 0x88F,
+    0x897, 0x897,
+    0x8A0, 0x8C9,
+    0x8D4, 0x8DF,
+    0x8E3, 0x8E9,
+    0x8F0, 0x93B,
+    0x93D, 0x94C,
+    0x94E, 0x950,
+    0x955, 0x963,
+    0x971, 0x983,
+    0x985, 0x98C,
+    0x98F, 0x990,
+    0x993, 0x9A8,
+    0x9AA, 0x9B0,
+    0x9B2, 0x9B2,
+    0x9B6, 0x9B9,
+    0x9BD, 0x9C4,
+    0x9C7, 0x9C8,
+    0x9CB, 0x9CC,
+    0x9CE, 0x9CE,
+    0x9D7, 0x9D7,
+    0x9DC, 0x9DD,
+    0x9DF, 0x9E3,
+    0x9F0, 0x9F1,
+    0x9FC, 0x9FC,
+    0xA01, 0xA03,
+    0xA05, 0xA0A,
+    0xA0F, 0xA10,
+    0xA13, 0xA28,
+    0xA2A, 0xA30,
+    0xA32, 0xA33,
+    0xA35, 0xA36,
+    0xA38, 0xA39,
+    0xA3E, 0xA42,
+    0xA47, 0xA48,
+    0xA4B, 0xA4C,
+    0xA51, 0xA51,
+    0xA59, 0xA5C,
+    0xA5E, 0xA5E,
+    0xA70, 0xA75,
+    0xA81, 0xA83,
+    0xA85, 0xA8D,
+    0xA8F, 0xA91,
+    0xA93, 0xAA8,
+    0xAAA, 0xAB0,
+    0xAB2, 0xAB3,
+    0xAB5, 0xAB9,
+    0xABD, 0xAC5,
+    0xAC7, 0xAC9,
+    0xACB, 0xACC,
+    0xAD0, 0xAD0,
+    0xAE0, 0xAE3,
+    0xAF9, 0xAFC,
+    0xB01, 0xB03,
+    0xB05, 0xB0C,
+    0xB0F, 0xB10,
+    0xB13, 0xB28,
+    0xB2A, 0xB30,
+    0xB32, 0xB33,
+    0xB35, 0xB39,
+    0xB3D, 0xB44,
+    0xB47, 0xB48,
+    0xB4B, 0xB4C,
+    0xB56, 0xB57,
+    0xB5C, 0xB5D,
+    0xB5F, 0xB63,
+    0xB71, 0xB71,
+    0xB82, 0xB83,
+    0xB85, 0xB8A,
+    0xB8E, 0xB90,
+    0xB92, 0xB95,
+    0xB99, 0xB9A,
+    0xB9C, 0xB9C,
+    0xB9E, 0xB9F,
+    0xBA3, 0xBA4,
+    0xBA8, 0xBAA,
+    0xBAE, 0xBB9,
+    0xBBE, 0xBC2,
+    0xBC6, 0xBC8,
+    0xBCA, 0xBCC,
+    0xBD0, 0xBD0,
+    0xBD7, 0xBD7,
+    0xC00, 0xC0C,
+    0xC0E, 0xC10,
+    0xC12, 0xC28,
+    0xC2A, 0xC39,
+    0xC3D, 0xC44,
+    0xC46, 0xC48,
+    0xC4A, 0xC4C,
+    0xC55, 0xC56,
+    0xC58, 0xC5A,
+    0xC5C, 0xC5D,
+    0xC60, 0xC63,
+    0xC80, 0xC83,
+    0xC85, 0xC8C,
+    0xC8E, 0xC90,
+    0xC92, 0xCA8,
+    0xCAA, 0xCB3,
+    0xCB5, 0xCB9,
+    0xCBD, 0xCC4,
+    0xCC6, 0xCC8,
+    0xCCA, 0xCCC,
+    0xCD5, 0xCD6,
+    0xCDC, 0xCDE,
+    0xCE0, 0xCE3,
+    0xCF1, 0xCF3,
+    0xD00, 0xD0C,
+    0xD0E, 0xD10,
+    0xD12, 0xD3A,
+    0xD3D, 0xD44,
+    0xD46, 0xD48,
+    0xD4A, 0xD4C,
+    0xD4E, 0xD4E,
+    0xD54, 0xD57,
+    0xD5F, 0xD63,
+    0xD7A, 0xD7F,
+    0xD81, 0xD83,
+    0xD85, 0xD96,
+    0xD9A, 0xDB1,
+    0xDB3, 0xDBB,
+    0xDBD, 0xDBD,
+    0xDC0, 0xDC6,
+    0xDCF, 0xDD4,
+    0xDD6, 0xDD6,
+    0xDD8, 0xDDF,
+    0xDF2, 0xDF3,
+    0xE01, 0xE3A,
+    0xE40, 0xE46,
+    0xE4D, 0xE4D,
+    0xE81, 0xE82,
+    0xE84, 0xE84,
+    0xE86, 0xE8A,
+    0xE8C, 0xEA3,
+    0xEA5, 0xEA5,
+    0xEA7, 0xEB9,
+    0xEBB, 0xEBD,
+    0xEC0, 0xEC4,
+    0xEC6, 0xEC6,
+    0xECD, 0xECD,
+    0xEDC, 0xEDF,
+    0xF00, 0xF00,
+    0xF40, 0xF47,
+    0xF49, 0xF6C,
+    0xF71, 0xF83,
+    0xF88, 0xF97,
+    0xF99, 0xFBC,
+    0x1000, 0x1036,
+    0x1038, 0x1038,
+    0x103B, 0x103F,
+    0x1050, 0x108F,
+    0x109A, 0x109D,
+    0x10A0, 0x10C5,
+    0x10C7, 0x10C7,
+    0x10CD, 0x10CD,
+    0x10D0, 0x10FA,
+    0x10FC, 0x1248,
+    0x124A, 0x124D,
+    0x1250, 0x1256,
+    0x1258, 0x1258,
+    0x125A, 0x125D,
+    0x1260, 0x1288,
+    0x128A, 0x128D,
+    0x1290, 0x12B0,
+    0x12B2, 0x12B5,
+    0x12B8, 0x12BE,
+    0x12C0, 0x12C0,
+    0x12C2, 0x12C5,
+    0x12C8, 0x12D6,
+    0x12D8, 0x1310,
+    0x1312, 0x1315,
+    0x1318, 0x135A,
+    0x1380, 0x138F,
+    0x13A0, 0x13F5,
+    0x13F8, 0x13FD,
+    0x1401, 0x166C,
+    0x166F, 0x167F,
+    0x1681, 0x169A,
+    0x16A0, 0x16EA,
+    0x16EE, 0x16F8,
+    0x1700, 0x1713,
+    0x171F, 0x1733,
+    0x1740, 0x1753,
+    0x1760, 0x176C,
+    0x176E, 0x1770,
+    0x1772, 0x1773,
+    0x1780, 0x17B3,
+    0x17B6, 0x17C8,
+    0x17D7, 0x17D7,
+    0x17DC, 0x17DC,
+    0x1820, 0x1878,
+    0x1880, 0x18AA,
+    0x18B0, 0x18F5,
+    0x1900, 0x191E,
+    0x1920, 0x192B,
+    0x1930, 0x1938,
+    0x1950, 0x196D,
+    0x1970, 0x1974,
+    0x1980, 0x19AB,
+    0x19B0, 0x19C9,
+    0x1A00, 0x1A1B,
+    0x1A20, 0x1A5E,
+    0x1A61, 0x1A74,
+    0x1AA7, 0x1AA7,
+    0x1ABF, 0x1AC0,
+    0x1ACC, 0x1ACE,
+    0x1B00, 0x1B33,
+    0x1B35, 0x1B43,
+    0x1B45, 0x1B4C,
+    0x1B80, 0x1BA9,
+    0x1BAC, 0x1BAF,
+    0x1BBA, 0x1BE5,
+    0x1BE7, 0x1BF1,
+    0x1C00, 0x1C36,
+    0x1C4D, 0x1C4F,
+    0x1C5A, 0x1C7D,
+    0x1C80, 0x1C8A,
+    0x1C90, 0x1CBA,
+    0x1CBD, 0x1CBF,
+    0x1CE9, 0x1CEC,
+    0x1CEE, 0x1CF3,
+    0x1CF5, 0x1CF6,
+    0x1CFA, 0x1CFA,
+    0x1D00, 0x1DBF,
+    0x1DD3, 0x1DF4,
+    0x1E00, 0x1F15,
+    0x1F18, 0x1F1D,
+    0x1F20, 0x1F45,
+    0x1F48, 0x1F4D,
+    0x1F50, 0x1F57,
+    0x1F59, 0x1F59,
+    0x1F5B, 0x1F5B,
+    0x1F5D, 0x1F5D,
+    0x1F5F, 0x1F7D,
+    0x1F80, 0x1FB4,
+    0x1FB6, 0x1FBC,
+    0x1FBE, 0x1FBE,
+    0x1FC2, 0x1FC4,
+    0x1FC6, 0x1FCC,
+    0x1FD0, 0x1FD3,
+    0x1FD6, 0x1FDB,
+    0x1FE0, 0x1FEC,
+    0x1FF2, 0x1FF4,
+    0x1FF6, 0x1FFC,
+    0x2071, 0x2071,
+    0x207F, 0x207F,
+    0x2090, 0x209C,
+    0x2102, 0x2102,
+    0x2107, 0x2107,
+    0x210A, 0x2113,
+    0x2115, 0x2115,
+    0x2119, 0x211D,
+    0x2124, 0x2124,
+    0x2126, 0x2126,
+    0x2128, 0x2128,
+    0x212A, 0x212D,
+    0x212F, 0x2139,
+    0x213C, 0x213F,
+    0x2145, 0x2149,
+    0x214E, 0x214E,
+    0x2160, 0x2188,
+    0x24B6, 0x24E9,
+    0x2C00, 0x2CE4,
+    0x2CEB, 0x2CEE,
+    0x2CF2, 0x2CF3,
+    0x2D00, 0x2D25,
+    0x2D27, 0x2D27,
+    0x2D2D, 0x2D2D,
+    0x2D30, 0x2D67,
+    0x2D6F, 0x2D6F,
+    0x2D80, 0x2D96,
+    0x2DA0, 0x2DA6,
+    0x2DA8, 0x2DAE,
+    0x2DB0, 0x2DB6,
+    0x2DB8, 0x2DBE,
+    0x2DC0, 0x2DC6,
+    0x2DC8, 0x2DCE,
+    0x2DD0, 0x2DD6,
+    0x2DD8, 0x2DDE,
+    0x2DE0, 0x2DFF,
+    0x2E2F, 0x2E2F,
+    0x3005, 0x3007,
+    0x3021, 0x3029,
+    0x3031, 0x3035,
+    0x3038, 0x303C,
+    0x3041, 0x3096,
+    0x309D, 0x309F,
+    0x30A1, 0x30FA,
+    0x30FC, 0x30FF,
+    0x3105, 0x312F,
+    0x3131, 0x318E,
+    0x31A0, 0x31BF,
+    0x31F0, 0x31FF,
+    0x3400, 0x4DBF,
+    0x4E00, 0xA48C,
+    0xA4D0, 0xA4FD,
+    0xA500, 0xA60C,
+    0xA610, 0xA61F,
+    0xA62A, 0xA62B,
+    0xA640, 0xA66E,
+    0xA674, 0xA67B,
+    0xA67F, 0xA6EF,
+    0xA717, 0xA71F,
+    0xA722, 0xA788,
+    0xA78B, 0xA7DC,
+    0xA7F1, 0xA805,
+    0xA807, 0xA827,
+    0xA840, 0xA873,
+    0xA880, 0xA8C3,
+    0xA8C5, 0xA8C5,
+    0xA8F2, 0xA8F7,
+    0xA8FB, 0xA8FB,
+    0xA8FD, 0xA8FF,
+    0xA90A, 0xA92A,
+    0xA930, 0xA952,
+    0xA960, 0xA97C,
+    0xA980, 0xA9B2,
+    0xA9B4, 0xA9BF,
+    0xA9CF, 0xA9CF,
+    0xA9E0, 0xA9EF,
+    0xA9FA, 0xA9FE,
+    0xAA00, 0xAA36,
+    0xAA40, 0xAA4D,
+    0xAA60, 0xAA76,
+    0xAA7A, 0xAABE,
+    0xAAC0, 0xAAC0,
+    0xAAC2, 0xAAC2,
+    0xAADB, 0xAADD,
+    0xAAE0, 0xAAEF,
+    0xAAF2, 0xAAF5,
+    0xAB01, 0xAB06,
+    0xAB09, 0xAB0E,
+    0xAB11, 0xAB16,
+    0xAB20, 0xAB26,
+    0xAB28, 0xAB2E,
+    0xAB30, 0xAB5A,
+    0xAB5C, 0xAB69,
+    0xAB70, 0xABEA,
+    0xAC00, 0xD7A3,
+    0xD7B0, 0xD7C6,
+    0xD7CB, 0xD7FB,
+    0xF900, 0xFA6D,
+    0xFA70, 0xFAD9,
+    0xFB00, 0xFB06,
+    0xFB13, 0xFB17,
+    0xFB1D, 0xFB28,
+    0xFB2A, 0xFB36,
+    0xFB38, 0xFB3C,
+    0xFB3E, 0xFB3E,
+    0xFB40, 0xFB41,
+    0xFB43, 0xFB44,
+    0xFB46, 0xFBB1,
+    0xFBD3, 0xFD3D,
+    0xFD50, 0xFD8F,
+    0xFD92, 0xFDC7,
+    0xFDF0, 0xFDFB,
+    0xFE70, 0xFE74,
+    0xFE76, 0xFEFC,
+    0xFF21, 0xFF3A,
+    0xFF41, 0xFF5A,
+    0xFF66, 0xFFBE,
+    0xFFC2, 0xFFC7,
+    0xFFCA, 0xFFCF,
+    0xFFD2, 0xFFD7,
+    0xFFDA, 0xFFDC,
+    0x10000, 0x1000B,
+    0x1000D, 0x10026,
+    0x10028, 0x1003A,
+    0x1003C, 0x1003D,
+    0x1003F, 0x1004D,
+    0x10050, 0x1005D,
+    0x10080, 0x100FA,
+    0x10140, 0x10174,
+    0x10280, 0x1029C,
+    0x102A0, 0x102D0,
+    0x10300, 0x1031F,
+    0x1032D, 0x1034A,
+    0x10350, 0x1037A,
+    0x10380, 0x1039D,
+    0x103A0, 0x103C3,
+    0x103C8, 0x103CF,
+    0x103D1, 0x103D5,
+    0x10400, 0x1049D,
+    0x104B0, 0x104D3,
+    0x104D8, 0x104FB,
+    0x10500, 0x10527,
+    0x10530, 0x10563,
+    0x10570, 0x1057A,
+    0x1057C, 0x1058A,
+    0x1058C, 0x10592,
+    0x10594, 0x10595,
+    0x10597, 0x105A1,
+    0x105A3, 0x105B1,
+    0x105B3, 0x105B9,
+    0x105BB, 0x105BC,
+    0x105C0, 0x105F3,
+    0x10600, 0x10736,
+    0x10740, 0x10755,
+    0x10760, 0x10767,
+    0x10780, 0x10785,
+    0x10787, 0x107B0,
+    0x107B2, 0x107BA,
+    0x10800, 0x10805,
+    0x10808, 0x10808,
+    0x1080A, 0x10835,
+    0x10837, 0x10838,
+    0x1083C, 0x1083C,
+    0x1083F, 0x10855,
+    0x10860, 0x10876,
+    0x10880, 0x1089E,
+    0x108E0, 0x108F2,
+    0x108F4, 0x108F5,
+    0x10900, 0x10915,
+    0x10920, 0x10939,
+    0x10940, 0x10959,
+    0x10980, 0x109B7,
+    0x109BE, 0x109BF,
+    0x10A00, 0x10A03,
+    0x10A05, 0x10A06,
+    0x10A0C, 0x10A13,
+    0x10A15, 0x10A17,
+    0x10A19, 0x10A35,
+    0x10A60, 0x10A7C,
+    0x10A80, 0x10A9C,
+    0x10AC0, 0x10AC7,
+    0x10AC9, 0x10AE4,
+    0x10B00, 0x10B35,
+    0x10B40, 0x10B55,
+    0x10B60, 0x10B72,
+    0x10B80, 0x10B91,
+    0x10C00, 0x10C48,
+    0x10C80, 0x10CB2,
+    0x10CC0, 0x10CF2,
+    0x10D00, 0x10D27,
+    0x10D4A, 0x10D65,
+    0x10D69, 0x10D69,
+    0x10D6F, 0x10D85,
+    0x10E80, 0x10EA9,
+    0x10EAB, 0x10EAC,
+    0x10EB0, 0x10EB1,
+    0x10EC2, 0x10EC7,
+    0x10EFA, 0x10EFC,
+    0x10F00, 0x10F1C,
+    0x10F27, 0x10F27,
+    0x10F30, 0x10F45,
+    0x10F70, 0x10F81,
+    0x10FB0, 0x10FC4,
+    0x10FE0, 0x10FF6,
+    0x11000, 0x11045,
+    0x11071, 0x11075,
+    0x11080, 0x110B8,
+    0x110C2, 0x110C2,
+    0x110D0, 0x110E8,
+    0x11100, 0x11132,
+    0x11144, 0x11147,
+    0x11150, 0x11172,
+    0x11176, 0x11176,
+    0x11180, 0x111BF,
+    0x111C1, 0x111C4,
+    0x111CE, 0x111CF,
+    0x111DA, 0x111DA,
+    0x111DC, 0x111DC,
+    0x11200, 0x11211,
+    0x11213, 0x11234,
+    0x11237, 0x11237,
+    0x1123E, 0x11241,
+    0x11280, 0x11286,
+    0x11288, 0x11288,
+    0x1128A, 0x1128D,
+    0x1128F, 0x1129D,
+    0x1129F, 0x112A8,
+    0x112B0, 0x112E8,
+    0x11300, 0x11303,
+    0x11305, 0x1130C,
+    0x1130F, 0x11310,
+    0x11313, 0x11328,
+    0x1132A, 0x11330,
+    0x11332, 0x11333,
+    0x11335, 0x11339,
+    0x1133D, 0x11344,
+    0x11347, 0x11348,
+    0x1134B, 0x1134C,
+    0x11350, 0x11350,
+    0x11357, 0x11357,
+    0x1135D, 0x11363,
+    0x11380, 0x11389,
+    0x1138B, 0x1138B,
+    0x1138E, 0x1138E,
+    0x11390, 0x113B5,
+    0x113B7, 0x113C0,
+    0x113C2, 0x113C2,
+    0x113C5, 0x113C5,
+    0x113C7, 0x113CA,
+    0x113CC, 0x113CD,
+    0x113D1, 0x113D1,
+    0x113D3, 0x113D3,
+    0x11400, 0x11441,
+    0x11443, 0x11445,
+    0x11447, 0x1144A,
+    0x1145F, 0x11461,
+    0x11480, 0x114C1,
+    0x114C4, 0x114C5,
+    0x114C7, 0x114C7,
+    0x11580, 0x115B5,
+    0x115B8, 0x115BE,
+    0x115D8, 0x115DD,
+    0x11600, 0x1163E,
+    0x11640, 0x11640,
+    0x11644, 0x11644,
+    0x11680, 0x116B5,
+    0x116B8, 0x116B8,
+    0x11700, 0x1171A,
+    0x1171D, 0x1172A,
+    0x11740, 0x11746,
+    0x11800, 0x11838,
+    0x118A0, 0x118DF,
+    0x118FF, 0x11906,
+    0x11909, 0x11909,
+    0x1190C, 0x11913,
+    0x11915, 0x11916,
+    0x11918, 0x11935,
+    0x11937, 0x11938,
+    0x1193B, 0x1193C,
+    0x1193F, 0x11942,
+    0x119A0, 0x119A7,
+    0x119AA, 0x119D7,
+    0x119DA, 0x119DF,
+    0x119E1, 0x119E1,
+    0x119E3, 0x119E4,
+    0x11A00, 0x11A32,
+    0x11A35, 0x11A3E,
+    0x11A50, 0x11A97,
+    0x11A9D, 0x11A9D,
+    0x11AB0, 0x11AF8,
+    0x11B60, 0x11B67,
+    0x11BC0, 0x11BE0,
+    0x11C00, 0x11C08,
+    0x11C0A, 0x11C36,
+    0x11C38, 0x11C3E,
+    0x11C40, 0x11C40,
+    0x11C72, 0x11C8F,
+    0x11C92, 0x11CA7,
+    0x11CA9, 0x11CB6,
+    0x11D00, 0x11D06,
+    0x11D08, 0x11D09,
+    0x11D0B, 0x11D36,
+    0x11D3A, 0x11D3A,
+    0x11D3C, 0x11D3D,
+    0x11D3F, 0x11D41,
+    0x11D43, 0x11D43,
+    0x11D46, 0x11D47,
+    0x11D60, 0x11D65,
+    0x11D67, 0x11D68,
+    0x11D6A, 0x11D8E,
+    0x11D90, 0x11D91,
+    0x11D93, 0x11D96,
+    0x11D98, 0x11D98,
+    0x11DB0, 0x11DDB,
+    0x11EE0, 0x11EF6,
+    0x11F00, 0x11F10,
+    0x11F12, 0x11F3A,
+    0x11F3E, 0x11F40,
+    0x11FB0, 0x11FB0,
+    0x12000, 0x12399,
+    0x12400, 0x1246E,
+    0x12480, 0x12543,
+    0x12F90, 0x12FF0,
+    0x13000, 0x1342F,
+    0x13441, 0x13446,
+    0x13460, 0x143FA,
+    0x14400, 0x14646,
+    0x16100, 0x1612E,
+    0x16800, 0x16A38,
+    0x16A40, 0x16A5E,
+    0x16A70, 0x16ABE,
+    0x16AD0, 0x16AED,
+    0x16B00, 0x16B2F,
+    0x16B40, 0x16B43,
+    0x16B63, 0x16B77,
+    0x16B7D, 0x16B8F,
+    0x16D40, 0x16D6C,
+    0x16E40, 0x16E7F,
+    0x16EA0, 0x16EB8,
+    0x16EBB, 0x16ED3,
+    0x16F00, 0x16F4A,
+    0x16F4F, 0x16F87,
+    0x16F8F, 0x16F9F,
+    0x16FE0, 0x16FE1,
+    0x16FE3, 0x16FE3,
+    0x16FF0, 0x16FF6,
+    0x17000, 0x18CD5,
+    0x18CFF, 0x18D1E,
+    0x18D80, 0x18DF2,
+    0x1AFF0, 0x1AFF3,
+    0x1AFF5, 0x1AFFB,
+    0x1AFFD, 0x1AFFE,
+    0x1B000, 0x1B122,
+    0x1B132, 0x1B132,
+    0x1B150, 0x1B152,
+    0x1B155, 0x1B155,
+    0x1B164, 0x1B167,
+    0x1B170, 0x1B2FB,
+    0x1BC00, 0x1BC6A,
+    0x1BC70, 0x1BC7C,
+    0x1BC80, 0x1BC88,
+    0x1BC90, 0x1BC99,
+    0x1BC9E, 0x1BC9E,
+    0x1D400, 0x1D454,
+    0x1D456, 0x1D49C,
+    0x1D49E, 0x1D49F,
+    0x1D4A2, 0x1D4A2,
+    0x1D4A5, 0x1D4A6,
+    0x1D4A9, 0x1D4AC,
+    0x1D4AE, 0x1D4B9,
+    0x1D4BB, 0x1D4BB,
+    0x1D4BD, 0x1D4C3,
+    0x1D4C5, 0x1D505,
+    0x1D507, 0x1D50A,
+    0x1D50D, 0x1D514,
+    0x1D516, 0x1D51C,
+    0x1D51E, 0x1D539,
+    0x1D53B, 0x1D53E,
+    0x1D540, 0x1D544,
+    0x1D546, 0x1D546,
+    0x1D54A, 0x1D550,
+    0x1D552, 0x1D6A5,
+    0x1D6A8, 0x1D6C0,
+    0x1D6C2, 0x1D6DA,
+    0x1D6DC, 0x1D6FA,
+    0x1D6FC, 0x1D714,
+    0x1D716, 0x1D734,
+    0x1D736, 0x1D74E,
+    0x1D750, 0x1D76E,
+    0x1D770, 0x1D788,
+    0x1D78A, 0x1D7A8,
+    0x1D7AA, 0x1D7C2,
+    0x1D7C4, 0x1D7CB,
+    0x1DF00, 0x1DF1E,
+    0x1DF25, 0x1DF2A,
+    0x1E000, 0x1E006,
+    0x1E008, 0x1E018,
+    0x1E01B, 0x1E021,
+    0x1E023, 0x1E024,
+    0x1E026, 0x1E02A,
+    0x1E030, 0x1E06D,
+    0x1E08F, 0x1E08F,
+    0x1E100, 0x1E12C,
+    0x1E137, 0x1E13D,
+    0x1E14E, 0x1E14E,
+    0x1E290, 0x1E2AD,
+    0x1E2C0, 0x1E2EB,
+    0x1E4D0, 0x1E4EB,
+    0x1E5D0, 0x1E5ED,
+    0x1E5F0, 0x1E5F0,
+    0x1E6C0, 0x1E6DE,
+    0x1E6E0, 0x1E6F5,
+    0x1E6FE, 0x1E6FF,
+    0x1E7E0, 0x1E7E6,
+    0x1E7E8, 0x1E7EB,
+    0x1E7ED, 0x1E7EE,
+    0x1E7F0, 0x1E7FE,
+    0x1E800, 0x1E8C4,
+    0x1E900, 0x1E943,
+    0x1E947, 0x1E947,
+    0x1E94B, 0x1E94B,
+    0x1EE00, 0x1EE03,
+    0x1EE05, 0x1EE1F,
+    0x1EE21, 0x1EE22,
+    0x1EE24, 0x1EE24,
+    0x1EE27, 0x1EE27,
+    0x1EE29, 0x1EE32,
+    0x1EE34, 0x1EE37,
+    0x1EE39, 0x1EE39,
+    0x1EE3B, 0x1EE3B,
+    0x1EE42, 0x1EE42,
+    0x1EE47, 0x1EE47,
+    0x1EE49, 0x1EE49,
+    0x1EE4B, 0x1EE4B,
+    0x1EE4D, 0x1EE4F,
+    0x1EE51, 0x1EE52,
+    0x1EE54, 0x1EE54,
+    0x1EE57, 0x1EE57,
+    0x1EE59, 0x1EE59,
+    0x1EE5B, 0x1EE5B,
+    0x1EE5D, 0x1EE5D,
+    0x1EE5F, 0x1EE5F,
+    0x1EE61, 0x1EE62,
+    0x1EE64, 0x1EE64,
+    0x1EE67, 0x1EE6A,
+    0x1EE6C, 0x1EE72,
+    0x1EE74, 0x1EE77,
+    0x1EE79, 0x1EE7C,
+    0x1EE7E, 0x1EE7E,
+    0x1EE80, 0x1EE89,
+    0x1EE8B, 0x1EE9B,
+    0x1EEA1, 0x1EEA3,
+    0x1EEA5, 0x1EEA9,
+    0x1EEAB, 0x1EEBB,
+    0x1F130, 0x1F149,
+    0x1F150, 0x1F169,
+    0x1F170, 0x1F189,
+    0x20000, 0x2A6DF,
+    0x2A700, 0x2B81D,
+    0x2B820, 0x2CEAD,
+    0x2CEB0, 0x2EBE0,
+    0x2EBF0, 0x2EE5D,
+    0x2F800, 0x2FA1D,
+    0x30000, 0x3134A,
+    0x31350, 0x33479,
+};
+
+#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1598
+static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
+    0x100, 0x2C1,
+    0x2C6, 0x2D1,
+    0x2E0, 0x2E4,
+    0x2EC, 0x2EC,
+    0x2EE, 0x2EE,
+    0x345, 0x345,
+    0x363, 0x374,
+    0x376, 0x377,
+    0x37A, 0x37D,
+    0x37F, 0x37F,
+    0x386, 0x386,
+    0x388, 0x38A,
+    0x38C, 0x38C,
+    0x38E, 0x3A1,
+    0x3A3, 0x3F5,
+    0x3F7, 0x481,
+    0x48A, 0x52F,
+    0x531, 0x556,
+    0x559, 0x559,
+    0x560, 0x588,
+    0x5B0, 0x5BD,
+    0x5BF, 0x5BF,
+    0x5C1, 0x5C2,
+    0x5C4, 0x5C5,
+    0x5C7, 0x5C7,
+    0x5D0, 0x5EA,
+    0x5EF, 0x5F2,
+    0x610, 0x61A,
+    0x620, 0x657,
+    0x659, 0x669,
+    0x66E, 0x6D3,
+    0x6D5, 0x6DC,
+    0x6E1, 0x6E8,
+    0x6ED, 0x6FC,
+    0x6FF, 0x6FF,
+    0x710, 0x73F,
+    0x74D, 0x7B1,
+    0x7C0, 0x7EA,
+    0x7F4, 0x7F5,
+    0x7FA, 0x7FA,
+    0x800, 0x817,
+    0x81A, 0x82C,
+    0x840, 0x858,
+    0x860, 0x86A,
+    0x870, 0x887,
+    0x889, 0x88F,
+    0x897, 0x897,
+    0x8A0, 0x8C9,
+    0x8D4, 0x8DF,
+    0x8E3, 0x8E9,
+    0x8F0, 0x93B,
+    0x93D, 0x94C,
+    0x94E, 0x950,
+    0x955, 0x963,
+    0x966, 0x96F,
+    0x971, 0x983,
+    0x985, 0x98C,
+    0x98F, 0x990,
+    0x993, 0x9A8,
+    0x9AA, 0x9B0,
+    0x9B2, 0x9B2,
+    0x9B6, 0x9B9,
+    0x9BD, 0x9C4,
+    0x9C7, 0x9C8,
+    0x9CB, 0x9CC,
+    0x9CE, 0x9CE,
+    0x9D7, 0x9D7,
+    0x9DC, 0x9DD,
+    0x9DF, 0x9E3,
+    0x9E6, 0x9F1,
+    0x9FC, 0x9FC,
+    0xA01, 0xA03,
+    0xA05, 0xA0A,
+    0xA0F, 0xA10,
+    0xA13, 0xA28,
+    0xA2A, 0xA30,
+    0xA32, 0xA33,
+    0xA35, 0xA36,
+    0xA38, 0xA39,
+    0xA3E, 0xA42,
+    0xA47, 0xA48,
+    0xA4B, 0xA4C,
+    0xA51, 0xA51,
+    0xA59, 0xA5C,
+    0xA5E, 0xA5E,
+    0xA66, 0xA75,
+    0xA81, 0xA83,
+    0xA85, 0xA8D,
+    0xA8F, 0xA91,
+    0xA93, 0xAA8,
+    0xAAA, 0xAB0,
+    0xAB2, 0xAB3,
+    0xAB5, 0xAB9,
+    0xABD, 0xAC5,
+    0xAC7, 0xAC9,
+    0xACB, 0xACC,
+    0xAD0, 0xAD0,
+    0xAE0, 0xAE3,
+    0xAE6, 0xAEF,
+    0xAF9, 0xAFC,
+    0xB01, 0xB03,
+    0xB05, 0xB0C,
+    0xB0F, 0xB10,
+    0xB13, 0xB28,
+    0xB2A, 0xB30,
+    0xB32, 0xB33,
+    0xB35, 0xB39,
+    0xB3D, 0xB44,
+    0xB47, 0xB48,
+    0xB4B, 0xB4C,
+    0xB56, 0xB57,
+    0xB5C, 0xB5D,
+    0xB5F, 0xB63,
+    0xB66, 0xB6F,
+    0xB71, 0xB71,
+    0xB82, 0xB83,
+    0xB85, 0xB8A,
+    0xB8E, 0xB90,
+    0xB92, 0xB95,
+    0xB99, 0xB9A,
+    0xB9C, 0xB9C,
+    0xB9E, 0xB9F,
+    0xBA3, 0xBA4,
+    0xBA8, 0xBAA,
+    0xBAE, 0xBB9,
+    0xBBE, 0xBC2,
+    0xBC6, 0xBC8,
+    0xBCA, 0xBCC,
+    0xBD0, 0xBD0,
+    0xBD7, 0xBD7,
+    0xBE6, 0xBEF,
+    0xC00, 0xC0C,
+    0xC0E, 0xC10,
+    0xC12, 0xC28,
+    0xC2A, 0xC39,
+    0xC3D, 0xC44,
+    0xC46, 0xC48,
+    0xC4A, 0xC4C,
+    0xC55, 0xC56,
+    0xC58, 0xC5A,
+    0xC5C, 0xC5D,
+    0xC60, 0xC63,
+    0xC66, 0xC6F,
+    0xC80, 0xC83,
+    0xC85, 0xC8C,
+    0xC8E, 0xC90,
+    0xC92, 0xCA8,
+    0xCAA, 0xCB3,
+    0xCB5, 0xCB9,
+    0xCBD, 0xCC4,
+    0xCC6, 0xCC8,
+    0xCCA, 0xCCC,
+    0xCD5, 0xCD6,
+    0xCDC, 0xCDE,
+    0xCE0, 0xCE3,
+    0xCE6, 0xCEF,
+    0xCF1, 0xCF3,
+    0xD00, 0xD0C,
+    0xD0E, 0xD10,
+    0xD12, 0xD3A,
+    0xD3D, 0xD44,
+    0xD46, 0xD48,
+    0xD4A, 0xD4C,
+    0xD4E, 0xD4E,
+    0xD54, 0xD57,
+    0xD5F, 0xD63,
+    0xD66, 0xD6F,
+    0xD7A, 0xD7F,
+    0xD81, 0xD83,
+    0xD85, 0xD96,
+    0xD9A, 0xDB1,
+    0xDB3, 0xDBB,
+    0xDBD, 0xDBD,
+    0xDC0, 0xDC6,
+    0xDCF, 0xDD4,
+    0xDD6, 0xDD6,
+    0xDD8, 0xDDF,
+    0xDE6, 0xDEF,
+    0xDF2, 0xDF3,
+    0xE01, 0xE3A,
+    0xE40, 0xE46,
+    0xE4D, 0xE4D,
+    0xE50, 0xE59,
+    0xE81, 0xE82,
+    0xE84, 0xE84,
+    0xE86, 0xE8A,
+    0xE8C, 0xEA3,
+    0xEA5, 0xEA5,
+    0xEA7, 0xEB9,
+    0xEBB, 0xEBD,
+    0xEC0, 0xEC4,
+    0xEC6, 0xEC6,
+    0xECD, 0xECD,
+    0xED0, 0xED9,
+    0xEDC, 0xEDF,
+    0xF00, 0xF00,
+    0xF20, 0xF29,
+    0xF40, 0xF47,
+    0xF49, 0xF6C,
+    0xF71, 0xF83,
+    0xF88, 0xF97,
+    0xF99, 0xFBC,
+    0x1000, 0x1036,
+    0x1038, 0x1038,
+    0x103B, 0x1049,
+    0x1050, 0x109D,
+    0x10A0, 0x10C5,
+    0x10C7, 0x10C7,
+    0x10CD, 0x10CD,
+    0x10D0, 0x10FA,
+    0x10FC, 0x1248,
+    0x124A, 0x124D,
+    0x1250, 0x1256,
+    0x1258, 0x1258,
+    0x125A, 0x125D,
+    0x1260, 0x1288,
+    0x128A, 0x128D,
+    0x1290, 0x12B0,
+    0x12B2, 0x12B5,
+    0x12B8, 0x12BE,
+    0x12C0, 0x12C0,
+    0x12C2, 0x12C5,
+    0x12C8, 0x12D6,
+    0x12D8, 0x1310,
+    0x1312, 0x1315,
+    0x1318, 0x135A,
+    0x1380, 0x138F,
+    0x13A0, 0x13F5,
+    0x13F8, 0x13FD,
+    0x1401, 0x166C,
+    0x166F, 0x167F,
+    0x1681, 0x169A,
+    0x16A0, 0x16EA,
+    0x16EE, 0x16F8,
+    0x1700, 0x1713,
+    0x171F, 0x1733,
+    0x1740, 0x1753,
+    0x1760, 0x176C,
+    0x176E, 0x1770,
+    0x1772, 0x1773,
+    0x1780, 0x17B3,
+    0x17B6, 0x17C8,
+    0x17D7, 0x17D7,
+    0x17DC, 0x17DC,
+    0x17E0, 0x17E9,
+    0x1810, 0x1819,
+    0x1820, 0x1878,
+    0x1880, 0x18AA,
+    0x18B0, 0x18F5,
+    0x1900, 0x191E,
+    0x1920, 0x192B,
+    0x1930, 0x1938,
+    0x1946, 0x196D,
+    0x1970, 0x1974,
+    0x1980, 0x19AB,
+    0x19B0, 0x19C9,
+    0x19D0, 0x19D9,
+    0x1A00, 0x1A1B,
+    0x1A20, 0x1A5E,
+    0x1A61, 0x1A74,
+    0x1A80, 0x1A89,
+    0x1A90, 0x1A99,
+    0x1AA7, 0x1AA7,
+    0x1ABF, 0x1AC0,
+    0x1ACC, 0x1ACE,
+    0x1B00, 0x1B33,
+    0x1B35, 0x1B43,
+    0x1B45, 0x1B4C,
+    0x1B50, 0x1B59,
+    0x1B80, 0x1BA9,
+    0x1BAC, 0x1BE5,
+    0x1BE7, 0x1BF1,
+    0x1C00, 0x1C36,
+    0x1C40, 0x1C49,
+    0x1C4D, 0x1C7D,
+    0x1C80, 0x1C8A,
+    0x1C90, 0x1CBA,
+    0x1CBD, 0x1CBF,
+    0x1CE9, 0x1CEC,
+    0x1CEE, 0x1CF3,
+    0x1CF5, 0x1CF6,
+    0x1CFA, 0x1CFA,
+    0x1D00, 0x1DBF,
+    0x1DD3, 0x1DF4,
+    0x1E00, 0x1F15,
+    0x1F18, 0x1F1D,
+    0x1F20, 0x1F45,
+    0x1F48, 0x1F4D,
+    0x1F50, 0x1F57,
+    0x1F59, 0x1F59,
+    0x1F5B, 0x1F5B,
+    0x1F5D, 0x1F5D,
+    0x1F5F, 0x1F7D,
+    0x1F80, 0x1FB4,
+    0x1FB6, 0x1FBC,
+    0x1FBE, 0x1FBE,
+    0x1FC2, 0x1FC4,
+    0x1FC6, 0x1FCC,
+    0x1FD0, 0x1FD3,
+    0x1FD6, 0x1FDB,
+    0x1FE0, 0x1FEC,
+    0x1FF2, 0x1FF4,
+    0x1FF6, 0x1FFC,
+    0x2071, 0x2071,
+    0x207F, 0x207F,
+    0x2090, 0x209C,
+    0x2102, 0x2102,
+    0x2107, 0x2107,
+    0x210A, 0x2113,
+    0x2115, 0x2115,
+    0x2119, 0x211D,
+    0x2124, 0x2124,
+    0x2126, 0x2126,
+    0x2128, 0x2128,
+    0x212A, 0x212D,
+    0x212F, 0x2139,
+    0x213C, 0x213F,
+    0x2145, 0x2149,
+    0x214E, 0x214E,
+    0x2160, 0x2188,
+    0x24B6, 0x24E9,
+    0x2C00, 0x2CE4,
+    0x2CEB, 0x2CEE,
+    0x2CF2, 0x2CF3,
+    0x2D00, 0x2D25,
+    0x2D27, 0x2D27,
+    0x2D2D, 0x2D2D,
+    0x2D30, 0x2D67,
+    0x2D6F, 0x2D6F,
+    0x2D80, 0x2D96,
+    0x2DA0, 0x2DA6,
+    0x2DA8, 0x2DAE,
+    0x2DB0, 0x2DB6,
+    0x2DB8, 0x2DBE,
+    0x2DC0, 0x2DC6,
+    0x2DC8, 0x2DCE,
+    0x2DD0, 0x2DD6,
+    0x2DD8, 0x2DDE,
+    0x2DE0, 0x2DFF,
+    0x2E2F, 0x2E2F,
+    0x3005, 0x3007,
+    0x3021, 0x3029,
+    0x3031, 0x3035,
+    0x3038, 0x303C,
+    0x3041, 0x3096,
+    0x309D, 0x309F,
+    0x30A1, 0x30FA,
+    0x30FC, 0x30FF,
+    0x3105, 0x312F,
+    0x3131, 0x318E,
+    0x31A0, 0x31BF,
+    0x31F0, 0x31FF,
+    0x3400, 0x4DBF,
+    0x4E00, 0xA48C,
+    0xA4D0, 0xA4FD,
+    0xA500, 0xA60C,
+    0xA610, 0xA62B,
+    0xA640, 0xA66E,
+    0xA674, 0xA67B,
+    0xA67F, 0xA6EF,
+    0xA717, 0xA71F,
+    0xA722, 0xA788,
+    0xA78B, 0xA7DC,
+    0xA7F1, 0xA805,
+    0xA807, 0xA827,
+    0xA840, 0xA873,
+    0xA880, 0xA8C3,
+    0xA8C5, 0xA8C5,
+    0xA8D0, 0xA8D9,
+    0xA8F2, 0xA8F7,
+    0xA8FB, 0xA8FB,
+    0xA8FD, 0xA92A,
+    0xA930, 0xA952,
+    0xA960, 0xA97C,
+    0xA980, 0xA9B2,
+    0xA9B4, 0xA9BF,
+    0xA9CF, 0xA9D9,
+    0xA9E0, 0xA9FE,
+    0xAA00, 0xAA36,
+    0xAA40, 0xAA4D,
+    0xAA50, 0xAA59,
+    0xAA60, 0xAA76,
+    0xAA7A, 0xAABE,
+    0xAAC0, 0xAAC0,
+    0xAAC2, 0xAAC2,
+    0xAADB, 0xAADD,
+    0xAAE0, 0xAAEF,
+    0xAAF2, 0xAAF5,
+    0xAB01, 0xAB06,
+    0xAB09, 0xAB0E,
+    0xAB11, 0xAB16,
+    0xAB20, 0xAB26,
+    0xAB28, 0xAB2E,
+    0xAB30, 0xAB5A,
+    0xAB5C, 0xAB69,
+    0xAB70, 0xABEA,
+    0xABF0, 0xABF9,
+    0xAC00, 0xD7A3,
+    0xD7B0, 0xD7C6,
+    0xD7CB, 0xD7FB,
+    0xF900, 0xFA6D,
+    0xFA70, 0xFAD9,
+    0xFB00, 0xFB06,
+    0xFB13, 0xFB17,
+    0xFB1D, 0xFB28,
+    0xFB2A, 0xFB36,
+    0xFB38, 0xFB3C,
+    0xFB3E, 0xFB3E,
+    0xFB40, 0xFB41,
+    0xFB43, 0xFB44,
+    0xFB46, 0xFBB1,
+    0xFBD3, 0xFD3D,
+    0xFD50, 0xFD8F,
+    0xFD92, 0xFDC7,
+    0xFDF0, 0xFDFB,
+    0xFE70, 0xFE74,
+    0xFE76, 0xFEFC,
+    0xFF10, 0xFF19,
+    0xFF21, 0xFF3A,
+    0xFF41, 0xFF5A,
+    0xFF66, 0xFFBE,
+    0xFFC2, 0xFFC7,
+    0xFFCA, 0xFFCF,
+    0xFFD2, 0xFFD7,
+    0xFFDA, 0xFFDC,
+    0x10000, 0x1000B,
+    0x1000D, 0x10026,
+    0x10028, 0x1003A,
+    0x1003C, 0x1003D,
+    0x1003F, 0x1004D,
+    0x10050, 0x1005D,
+    0x10080, 0x100FA,
+    0x10140, 0x10174,
+    0x10280, 0x1029C,
+    0x102A0, 0x102D0,
+    0x10300, 0x1031F,
+    0x1032D, 0x1034A,
+    0x10350, 0x1037A,
+    0x10380, 0x1039D,
+    0x103A0, 0x103C3,
+    0x103C8, 0x103CF,
+    0x103D1, 0x103D5,
+    0x10400, 0x1049D,
+    0x104A0, 0x104A9,
+    0x104B0, 0x104D3,
+    0x104D8, 0x104FB,
+    0x10500, 0x10527,
+    0x10530, 0x10563,
+    0x10570, 0x1057A,
+    0x1057C, 0x1058A,
+    0x1058C, 0x10592,
+    0x10594, 0x10595,
+    0x10597, 0x105A1,
+    0x105A3, 0x105B1,
+    0x105B3, 0x105B9,
+    0x105BB, 0x105BC,
+    0x105C0, 0x105F3,
+    0x10600, 0x10736,
+    0x10740, 0x10755,
+    0x10760, 0x10767,
+    0x10780, 0x10785,
+    0x10787, 0x107B0,
+    0x107B2, 0x107BA,
+    0x10800, 0x10805,
+    0x10808, 0x10808,
+    0x1080A, 0x10835,
+    0x10837, 0x10838,
+    0x1083C, 0x1083C,
+    0x1083F, 0x10855,
+    0x10860, 0x10876,
+    0x10880, 0x1089E,
+    0x108E0, 0x108F2,
+    0x108F4, 0x108F5,
+    0x10900, 0x10915,
+    0x10920, 0x10939,
+    0x10940, 0x10959,
+    0x10980, 0x109B7,
+    0x109BE, 0x109BF,
+    0x10A00, 0x10A03,
+    0x10A05, 0x10A06,
+    0x10A0C, 0x10A13,
+    0x10A15, 0x10A17,
+    0x10A19, 0x10A35,
+    0x10A60, 0x10A7C,
+    0x10A80, 0x10A9C,
+    0x10AC0, 0x10AC7,
+    0x10AC9, 0x10AE4,
+    0x10B00, 0x10B35,
+    0x10B40, 0x10B55,
+    0x10B60, 0x10B72,
+    0x10B80, 0x10B91,
+    0x10C00, 0x10C48,
+    0x10C80, 0x10CB2,
+    0x10CC0, 0x10CF2,
+    0x10D00, 0x10D27,
+    0x10D30, 0x10D39,
+    0x10D40, 0x10D65,
+    0x10D69, 0x10D69,
+    0x10D6F, 0x10D85,
+    0x10E80, 0x10EA9,
+    0x10EAB, 0x10EAC,
+    0x10EB0, 0x10EB1,
+    0x10EC2, 0x10EC7,
+    0x10EFA, 0x10EFC,
+    0x10F00, 0x10F1C,
+    0x10F27, 0x10F27,
+    0x10F30, 0x10F45,
+    0x10F70, 0x10F81,
+    0x10FB0, 0x10FC4,
+    0x10FE0, 0x10FF6,
+    0x11000, 0x11045,
+    0x11066, 0x1106F,
+    0x11071, 0x11075,
+    0x11080, 0x110B8,
+    0x110C2, 0x110C2,
+    0x110D0, 0x110E8,
+    0x110F0, 0x110F9,
+    0x11100, 0x11132,
+    0x11136, 0x1113F,
+    0x11144, 0x11147,
+    0x11150, 0x11172,
+    0x11176, 0x11176,
+    0x11180, 0x111BF,
+    0x111C1, 0x111C4,
+    0x111CE, 0x111DA,
+    0x111DC, 0x111DC,
+    0x11200, 0x11211,
+    0x11213, 0x11234,
+    0x11237, 0x11237,
+    0x1123E, 0x11241,
+    0x11280, 0x11286,
+    0x11288, 0x11288,
+    0x1128A, 0x1128D,
+    0x1128F, 0x1129D,
+    0x1129F, 0x112A8,
+    0x112B0, 0x112E8,
+    0x112F0, 0x112F9,
+    0x11300, 0x11303,
+    0x11305, 0x1130C,
+    0x1130F, 0x11310,
+    0x11313, 0x11328,
+    0x1132A, 0x11330,
+    0x11332, 0x11333,
+    0x11335, 0x11339,
+    0x1133D, 0x11344,
+    0x11347, 0x11348,
+    0x1134B, 0x1134C,
+    0x11350, 0x11350,
+    0x11357, 0x11357,
+    0x1135D, 0x11363,
+    0x11380, 0x11389,
+    0x1138B, 0x1138B,
+    0x1138E, 0x1138E,
+    0x11390, 0x113B5,
+    0x113B7, 0x113C0,
+    0x113C2, 0x113C2,
+    0x113C5, 0x113C5,
+    0x113C7, 0x113CA,
+    0x113CC, 0x113CD,
+    0x113D1, 0x113D1,
+    0x113D3, 0x113D3,
+    0x11400, 0x11441,
+    0x11443, 0x11445,
+    0x11447, 0x1144A,
+    0x11450, 0x11459,
+    0x1145F, 0x11461,
+    0x11480, 0x114C1,
+    0x114C4, 0x114C5,
+    0x114C7, 0x114C7,
+    0x114D0, 0x114D9,
+    0x11580, 0x115B5,
+    0x115B8, 0x115BE,
+    0x115D8, 0x115DD,
+    0x11600, 0x1163E,
+    0x11640, 0x11640,
+    0x11644, 0x11644,
+    0x11650, 0x11659,
+    0x11680, 0x116B5,
+    0x116B8, 0x116B8,
+    0x116C0, 0x116C9,
+    0x116D0, 0x116E3,
+    0x11700, 0x1171A,
+    0x1171D, 0x1172A,
+    0x11730, 0x11739,
+    0x11740, 0x11746,
+    0x11800, 0x11838,
+    0x118A0, 0x118E9,
+    0x118FF, 0x11906,
+    0x11909, 0x11909,
+    0x1190C, 0x11913,
+    0x11915, 0x11916,
+    0x11918, 0x11935,
+    0x11937, 0x11938,
+    0x1193B, 0x1193C,
+    0x1193F, 0x11942,
+    0x11950, 0x11959,
+    0x119A0, 0x119A7,
+    0x119AA, 0x119D7,
+    0x119DA, 0x119DF,
+    0x119E1, 0x119E1,
+    0x119E3, 0x119E4,
+    0x11A00, 0x11A32,
+    0x11A35, 0x11A3E,
+    0x11A50, 0x11A97,
+    0x11A9D, 0x11A9D,
+    0x11AB0, 0x11AF8,
+    0x11B60, 0x11B67,
+    0x11BC0, 0x11BE0,
+    0x11BF0, 0x11BF9,
+    0x11C00, 0x11C08,
+    0x11C0A, 0x11C36,
+    0x11C38, 0x11C3E,
+    0x11C40, 0x11C40,
+    0x11C50, 0x11C59,
+    0x11C72, 0x11C8F,
+    0x11C92, 0x11CA7,
+    0x11CA9, 0x11CB6,
+    0x11D00, 0x11D06,
+    0x11D08, 0x11D09,
+    0x11D0B, 0x11D36,
+    0x11D3A, 0x11D3A,
+    0x11D3C, 0x11D3D,
+    0x11D3F, 0x11D41,
+    0x11D43, 0x11D43,
+    0x11D46, 0x11D47,
+    0x11D50, 0x11D59,
+    0x11D60, 0x11D65,
+    0x11D67, 0x11D68,
+    0x11D6A, 0x11D8E,
+    0x11D90, 0x11D91,
+    0x11D93, 0x11D96,
+    0x11D98, 0x11D98,
+    0x11DA0, 0x11DA9,
+    0x11DB0, 0x11DDB,
+    0x11DE0, 0x11DE9,
+    0x11EE0, 0x11EF6,
+    0x11F00, 0x11F10,
+    0x11F12, 0x11F3A,
+    0x11F3E, 0x11F40,
+    0x11F50, 0x11F59,
+    0x11FB0, 0x11FB0,
+    0x12000, 0x12399,
+    0x12400, 0x1246E,
+    0x12480, 0x12543,
+    0x12F90, 0x12FF0,
+    0x13000, 0x1342F,
+    0x13441, 0x13446,
+    0x13460, 0x143FA,
+    0x14400, 0x14646,
+    0x16100, 0x1612E,
+    0x16130, 0x16139,
+    0x16800, 0x16A38,
+    0x16A40, 0x16A5E,
+    0x16A60, 0x16A69,
+    0x16A70, 0x16ABE,
+    0x16AC0, 0x16AC9,
+    0x16AD0, 0x16AED,
+    0x16B00, 0x16B2F,
+    0x16B40, 0x16B43,
+    0x16B50, 0x16B59,
+    0x16B63, 0x16B77,
+    0x16B7D, 0x16B8F,
+    0x16D40, 0x16D6C,
+    0x16D70, 0x16D79,
+    0x16E40, 0x16E7F,
+    0x16EA0, 0x16EB8,
+    0x16EBB, 0x16ED3,
+    0x16F00, 0x16F4A,
+    0x16F4F, 0x16F87,
+    0x16F8F, 0x16F9F,
+    0x16FE0, 0x16FE1,
+    0x16FE3, 0x16FE3,
+    0x16FF0, 0x16FF6,
+    0x17000, 0x18CD5,
+    0x18CFF, 0x18D1E,
+    0x18D80, 0x18DF2,
+    0x1AFF0, 0x1AFF3,
+    0x1AFF5, 0x1AFFB,
+    0x1AFFD, 0x1AFFE,
+    0x1B000, 0x1B122,
+    0x1B132, 0x1B132,
+    0x1B150, 0x1B152,
+    0x1B155, 0x1B155,
+    0x1B164, 0x1B167,
+    0x1B170, 0x1B2FB,
+    0x1BC00, 0x1BC6A,
+    0x1BC70, 0x1BC7C,
+    0x1BC80, 0x1BC88,
+    0x1BC90, 0x1BC99,
+    0x1BC9E, 0x1BC9E,
+    0x1CCF0, 0x1CCF9,
+    0x1D400, 0x1D454,
+    0x1D456, 0x1D49C,
+    0x1D49E, 0x1D49F,
+    0x1D4A2, 0x1D4A2,
+    0x1D4A5, 0x1D4A6,
+    0x1D4A9, 0x1D4AC,
+    0x1D4AE, 0x1D4B9,
+    0x1D4BB, 0x1D4BB,
+    0x1D4BD, 0x1D4C3,
+    0x1D4C5, 0x1D505,
+    0x1D507, 0x1D50A,
+    0x1D50D, 0x1D514,
+    0x1D516, 0x1D51C,
+    0x1D51E, 0x1D539,
+    0x1D53B, 0x1D53E,
+    0x1D540, 0x1D544,
+    0x1D546, 0x1D546,
+    0x1D54A, 0x1D550,
+    0x1D552, 0x1D6A5,
+    0x1D6A8, 0x1D6C0,
+    0x1D6C2, 0x1D6DA,
+    0x1D6DC, 0x1D6FA,
+    0x1D6FC, 0x1D714,
+    0x1D716, 0x1D734,
+    0x1D736, 0x1D74E,
+    0x1D750, 0x1D76E,
+    0x1D770, 0x1D788,
+    0x1D78A, 0x1D7A8,
+    0x1D7AA, 0x1D7C2,
+    0x1D7C4, 0x1D7CB,
+    0x1D7CE, 0x1D7FF,
+    0x1DF00, 0x1DF1E,
+    0x1DF25, 0x1DF2A,
+    0x1E000, 0x1E006,
+    0x1E008, 0x1E018,
+    0x1E01B, 0x1E021,
+    0x1E023, 0x1E024,
+    0x1E026, 0x1E02A,
+    0x1E030, 0x1E06D,
+    0x1E08F, 0x1E08F,
+    0x1E100, 0x1E12C,
+    0x1E137, 0x1E13D,
+    0x1E140, 0x1E149,
+    0x1E14E, 0x1E14E,
+    0x1E290, 0x1E2AD,
+    0x1E2C0, 0x1E2EB,
+    0x1E2F0, 0x1E2F9,
+    0x1E4D0, 0x1E4EB,
+    0x1E4F0, 0x1E4F9,
+    0x1E5D0, 0x1E5ED,
+    0x1E5F0, 0x1E5FA,
+    0x1E6C0, 0x1E6DE,
+    0x1E6E0, 0x1E6F5,
+    0x1E6FE, 0x1E6FF,
+    0x1E7E0, 0x1E7E6,
+    0x1E7E8, 0x1E7EB,
+    0x1E7ED, 0x1E7EE,
+    0x1E7F0, 0x1E7FE,
+    0x1E800, 0x1E8C4,
+    0x1E900, 0x1E943,
+    0x1E947, 0x1E947,
+    0x1E94B, 0x1E94B,
+    0x1E950, 0x1E959,
+    0x1EE00, 0x1EE03,
+    0x1EE05, 0x1EE1F,
+    0x1EE21, 0x1EE22,
+    0x1EE24, 0x1EE24,
+    0x1EE27, 0x1EE27,
+    0x1EE29, 0x1EE32,
+    0x1EE34, 0x1EE37,
+    0x1EE39, 0x1EE39,
+    0x1EE3B, 0x1EE3B,
+    0x1EE42, 0x1EE42,
+    0x1EE47, 0x1EE47,
+    0x1EE49, 0x1EE49,
+    0x1EE4B, 0x1EE4B,
+    0x1EE4D, 0x1EE4F,
+    0x1EE51, 0x1EE52,
+    0x1EE54, 0x1EE54,
+    0x1EE57, 0x1EE57,
+    0x1EE59, 0x1EE59,
+    0x1EE5B, 0x1EE5B,
+    0x1EE5D, 0x1EE5D,
+    0x1EE5F, 0x1EE5F,
+    0x1EE61, 0x1EE62,
+    0x1EE64, 0x1EE64,
+    0x1EE67, 0x1EE6A,
+    0x1EE6C, 0x1EE72,
+    0x1EE74, 0x1EE77,
+    0x1EE79, 0x1EE7C,
+    0x1EE7E, 0x1EE7E,
+    0x1EE80, 0x1EE89,
+    0x1EE8B, 0x1EE9B,
+    0x1EEA1, 0x1EEA3,
+    0x1EEA5, 0x1EEA9,
+    0x1EEAB, 0x1EEBB,
+    0x1F130, 0x1F149,
+    0x1F150, 0x1F169,
+    0x1F170, 0x1F189,
+    0x1FBF0, 0x1FBF9,
+    0x20000, 0x2A6DF,
+    0x2A700, 0x2B81D,
+    0x2B820, 0x2CEAD,
+    0x2CEB0, 0x2EBE0,
+    0x2EBF0, 0x2EE5D,
+    0x2F800, 0x2FA1D,
+    0x30000, 0x3134A,
+    0x31350, 0x33479,
+};
+
+#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1320
+static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
+    0x100, 0x100,
+    0x102, 0x102,
+    0x104, 0x104,
+    0x106, 0x106,
+    0x108, 0x108,
+    0x10A, 0x10A,
+    0x10C, 0x10C,
+    0x10E, 0x10E,
+    0x110, 0x110,
+    0x112, 0x112,
+    0x114, 0x114,
+    0x116, 0x116,
+    0x118, 0x118,
+    0x11A, 0x11A,
+    0x11C, 0x11C,
+    0x11E, 0x11E,
+    0x120, 0x120,
+    0x122, 0x122,
+    0x124, 0x124,
+    0x126, 0x126,
+    0x128, 0x128,
+    0x12A, 0x12A,
+    0x12C, 0x12C,
+    0x12E, 0x12E,
+    0x130, 0x130,
+    0x132, 0x132,
+    0x134, 0x134,
+    0x136, 0x136,
+    0x139, 0x139,
+    0x13B, 0x13B,
+    0x13D, 0x13D,
+    0x13F, 0x13F,
+    0x141, 0x141,
+    0x143, 0x143,
+    0x145, 0x145,
+    0x147, 0x147,
+    0x14A, 0x14A,
+    0x14C, 0x14C,
+    0x14E, 0x14E,
+    0x150, 0x150,
+    0x152, 0x152,
+    0x154, 0x154,
+    0x156, 0x156,
+    0x158, 0x158,
+    0x15A, 0x15A,
+    0x15C, 0x15C,
+    0x15E, 0x15E,
+    0x160, 0x160,
+    0x162, 0x162,
+    0x164, 0x164,
+    0x166, 0x166,
+    0x168, 0x168,
+    0x16A, 0x16A,
+    0x16C, 0x16C,
+    0x16E, 0x16E,
+    0x170, 0x170,
+    0x172, 0x172,
+    0x174, 0x174,
+    0x176, 0x176,
+    0x178, 0x179,
+    0x17B, 0x17B,
+    0x17D, 0x17D,
+    0x181, 0x182,
+    0x184, 0x184,
+    0x186, 0x187,
+    0x189, 0x18B,
+    0x18E, 0x191,
+    0x193, 0x194,
+    0x196, 0x198,
+    0x19C, 0x19D,
+    0x19F, 0x1A0,
+    0x1A2, 0x1A2,
+    0x1A4, 0x1A4,
+    0x1A6, 0x1A7,
+    0x1A9, 0x1A9,
+    0x1AC, 0x1AC,
+    0x1AE, 0x1AF,
+    0x1B1, 0x1B3,
+    0x1B5, 0x1B5,
+    0x1B7, 0x1B8,
+    0x1BC, 0x1BC,
+    0x1C4, 0x1C5,
+    0x1C7, 0x1C8,
+    0x1CA, 0x1CB,
+    0x1CD, 0x1CD,
+    0x1CF, 0x1CF,
+    0x1D1, 0x1D1,
+    0x1D3, 0x1D3,
+    0x1D5, 0x1D5,
+    0x1D7, 0x1D7,
+    0x1D9, 0x1D9,
+    0x1DB, 0x1DB,
+    0x1DE, 0x1DE,
+    0x1E0, 0x1E0,
+    0x1E2, 0x1E2,
+    0x1E4, 0x1E4,
+    0x1E6, 0x1E6,
+    0x1E8, 0x1E8,
+    0x1EA, 0x1EA,
+    0x1EC, 0x1EC,
+    0x1EE, 0x1EE,
+    0x1F1, 0x1F2,
+    0x1F4, 0x1F4,
+    0x1F6, 0x1F8,
+    0x1FA, 0x1FA,
+    0x1FC, 0x1FC,
+    0x1FE, 0x1FE,
+    0x200, 0x200,
+    0x202, 0x202,
+    0x204, 0x204,
+    0x206, 0x206,
+    0x208, 0x208,
+    0x20A, 0x20A,
+    0x20C, 0x20C,
+    0x20E, 0x20E,
+    0x210, 0x210,
+    0x212, 0x212,
+    0x214, 0x214,
+    0x216, 0x216,
+    0x218, 0x218,
+    0x21A, 0x21A,
+    0x21C, 0x21C,
+    0x21E, 0x21E,
+    0x220, 0x220,
+    0x222, 0x222,
+    0x224, 0x224,
+    0x226, 0x226,
+    0x228, 0x228,
+    0x22A, 0x22A,
+    0x22C, 0x22C,
+    0x22E, 0x22E,
+    0x230, 0x230,
+    0x232, 0x232,
+    0x23A, 0x23B,
+    0x23D, 0x23E,
+    0x241, 0x241,
+    0x243, 0x246,
+    0x248, 0x248,
+    0x24A, 0x24A,
+    0x24C, 0x24C,
+    0x24E, 0x24E,
+    0x370, 0x370,
+    0x372, 0x372,
+    0x376, 0x376,
+    0x37F, 0x37F,
+    0x386, 0x386,
+    0x388, 0x38A,
+    0x38C, 0x38C,
+    0x38E, 0x38F,
+    0x391, 0x3A1,
+    0x3A3, 0x3AB,
+    0x3CF, 0x3CF,
+    0x3D2, 0x3D4,
+    0x3D8, 0x3D8,
+    0x3DA, 0x3DA,
+    0x3DC, 0x3DC,
+    0x3DE, 0x3DE,
+    0x3E0, 0x3E0,
+    0x3E2, 0x3E2,
+    0x3E4, 0x3E4,
+    0x3E6, 0x3E6,
+    0x3E8, 0x3E8,
+    0x3EA, 0x3EA,
+    0x3EC, 0x3EC,
+    0x3EE, 0x3EE,
+    0x3F4, 0x3F4,
+    0x3F7, 0x3F7,
+    0x3F9, 0x3FA,
+    0x3FD, 0x42F,
+    0x460, 0x460,
+    0x462, 0x462,
+    0x464, 0x464,
+    0x466, 0x466,
+    0x468, 0x468,
+    0x46A, 0x46A,
+    0x46C, 0x46C,
+    0x46E, 0x46E,
+    0x470, 0x470,
+    0x472, 0x472,
+    0x474, 0x474,
+    0x476, 0x476,
+    0x478, 0x478,
+    0x47A, 0x47A,
+    0x47C, 0x47C,
+    0x47E, 0x47E,
+    0x480, 0x480,
+    0x48A, 0x48A,
+    0x48C, 0x48C,
+    0x48E, 0x48E,
+    0x490, 0x490,
+    0x492, 0x492,
+    0x494, 0x494,
+    0x496, 0x496,
+    0x498, 0x498,
+    0x49A, 0x49A,
+    0x49C, 0x49C,
+    0x49E, 0x49E,
+    0x4A0, 0x4A0,
+    0x4A2, 0x4A2,
+    0x4A4, 0x4A4,
+    0x4A6, 0x4A6,
+    0x4A8, 0x4A8,
+    0x4AA, 0x4AA,
+    0x4AC, 0x4AC,
+    0x4AE, 0x4AE,
+    0x4B0, 0x4B0,
+    0x4B2, 0x4B2,
+    0x4B4, 0x4B4,
+    0x4B6, 0x4B6,
+    0x4B8, 0x4B8,
+    0x4BA, 0x4BA,
+    0x4BC, 0x4BC,
+    0x4BE, 0x4BE,
+    0x4C0, 0x4C1,
+    0x4C3, 0x4C3,
+    0x4C5, 0x4C5,
+    0x4C7, 0x4C7,
+    0x4C9, 0x4C9,
+    0x4CB, 0x4CB,
+    0x4CD, 0x4CD,
+    0x4D0, 0x4D0,
+    0x4D2, 0x4D2,
+    0x4D4, 0x4D4,
+    0x4D6, 0x4D6,
+    0x4D8, 0x4D8,
+    0x4DA, 0x4DA,
+    0x4DC, 0x4DC,
+    0x4DE, 0x4DE,
+    0x4E0, 0x4E0,
+    0x4E2, 0x4E2,
+    0x4E4, 0x4E4,
+    0x4E6, 0x4E6,
+    0x4E8, 0x4E8,
+    0x4EA, 0x4EA,
+    0x4EC, 0x4EC,
+    0x4EE, 0x4EE,
+    0x4F0, 0x4F0,
+    0x4F2, 0x4F2,
+    0x4F4, 0x4F4,
+    0x4F6, 0x4F6,
+    0x4F8, 0x4F8,
+    0x4FA, 0x4FA,
+    0x4FC, 0x4FC,
+    0x4FE, 0x4FE,
+    0x500, 0x500,
+    0x502, 0x502,
+    0x504, 0x504,
+    0x506, 0x506,
+    0x508, 0x508,
+    0x50A, 0x50A,
+    0x50C, 0x50C,
+    0x50E, 0x50E,
+    0x510, 0x510,
+    0x512, 0x512,
+    0x514, 0x514,
+    0x516, 0x516,
+    0x518, 0x518,
+    0x51A, 0x51A,
+    0x51C, 0x51C,
+    0x51E, 0x51E,
+    0x520, 0x520,
+    0x522, 0x522,
+    0x524, 0x524,
+    0x526, 0x526,
+    0x528, 0x528,
+    0x52A, 0x52A,
+    0x52C, 0x52C,
+    0x52E, 0x52E,
+    0x531, 0x556,
+    0x10A0, 0x10C5,
+    0x10C7, 0x10C7,
+    0x10CD, 0x10CD,
+    0x13A0, 0x13F5,
+    0x1C89, 0x1C89,
+    0x1C90, 0x1CBA,
+    0x1CBD, 0x1CBF,
+    0x1E00, 0x1E00,
+    0x1E02, 0x1E02,
+    0x1E04, 0x1E04,
+    0x1E06, 0x1E06,
+    0x1E08, 0x1E08,
+    0x1E0A, 0x1E0A,
+    0x1E0C, 0x1E0C,
+    0x1E0E, 0x1E0E,
+    0x1E10, 0x1E10,
+    0x1E12, 0x1E12,
+    0x1E14, 0x1E14,
+    0x1E16, 0x1E16,
+    0x1E18, 0x1E18,
+    0x1E1A, 0x1E1A,
+    0x1E1C, 0x1E1C,
+    0x1E1E, 0x1E1E,
+    0x1E20, 0x1E20,
+    0x1E22, 0x1E22,
+    0x1E24, 0x1E24,
+    0x1E26, 0x1E26,
+    0x1E28, 0x1E28,
+    0x1E2A, 0x1E2A,
+    0x1E2C, 0x1E2C,
+    0x1E2E, 0x1E2E,
+    0x1E30, 0x1E30,
+    0x1E32, 0x1E32,
+    0x1E34, 0x1E34,
+    0x1E36, 0x1E36,
+    0x1E38, 0x1E38,
+    0x1E3A, 0x1E3A,
+    0x1E3C, 0x1E3C,
+    0x1E3E, 0x1E3E,
+    0x1E40, 0x1E40,
+    0x1E42, 0x1E42,
+    0x1E44, 0x1E44,
+    0x1E46, 0x1E46,
+    0x1E48, 0x1E48,
+    0x1E4A, 0x1E4A,
+    0x1E4C, 0x1E4C,
+    0x1E4E, 0x1E4E,
+    0x1E50, 0x1E50,
+    0x1E52, 0x1E52,
+    0x1E54, 0x1E54,
+    0x1E56, 0x1E56,
+    0x1E58, 0x1E58,
+    0x1E5A, 0x1E5A,
+    0x1E5C, 0x1E5C,
+    0x1E5E, 0x1E5E,
+    0x1E60, 0x1E60,
+    0x1E62, 0x1E62,
+    0x1E64, 0x1E64,
+    0x1E66, 0x1E66,
+    0x1E68, 0x1E68,
+    0x1E6A, 0x1E6A,
+    0x1E6C, 0x1E6C,
+    0x1E6E, 0x1E6E,
+    0x1E70, 0x1E70,
+    0x1E72, 0x1E72,
+    0x1E74, 0x1E74,
+    0x1E76, 0x1E76,
+    0x1E78, 0x1E78,
+    0x1E7A, 0x1E7A,
+    0x1E7C, 0x1E7C,
+    0x1E7E, 0x1E7E,
+    0x1E80, 0x1E80,
+    0x1E82, 0x1E82,
+    0x1E84, 0x1E84,
+    0x1E86, 0x1E86,
+    0x1E88, 0x1E88,
+    0x1E8A, 0x1E8A,
+    0x1E8C, 0x1E8C,
+    0x1E8E, 0x1E8E,
+    0x1E90, 0x1E90,
+    0x1E92, 0x1E92,
+    0x1E94, 0x1E94,
+    0x1E9E, 0x1E9E,
+    0x1EA0, 0x1EA0,
+    0x1EA2, 0x1EA2,
+    0x1EA4, 0x1EA4,
+    0x1EA6, 0x1EA6,
+    0x1EA8, 0x1EA8,
+    0x1EAA, 0x1EAA,
+    0x1EAC, 0x1EAC,
+    0x1EAE, 0x1EAE,
+    0x1EB0, 0x1EB0,
+    0x1EB2, 0x1EB2,
+    0x1EB4, 0x1EB4,
+    0x1EB6, 0x1EB6,
+    0x1EB8, 0x1EB8,
+    0x1EBA, 0x1EBA,
+    0x1EBC, 0x1EBC,
+    0x1EBE, 0x1EBE,
+    0x1EC0, 0x1EC0,
+    0x1EC2, 0x1EC2,
+    0x1EC4, 0x1EC4,
+    0x1EC6, 0x1EC6,
+    0x1EC8, 0x1EC8,
+    0x1ECA, 0x1ECA,
+    0x1ECC, 0x1ECC,
+    0x1ECE, 0x1ECE,
+    0x1ED0, 0x1ED0,
+    0x1ED2, 0x1ED2,
+    0x1ED4, 0x1ED4,
+    0x1ED6, 0x1ED6,
+    0x1ED8, 0x1ED8,
+    0x1EDA, 0x1EDA,
+    0x1EDC, 0x1EDC,
+    0x1EDE, 0x1EDE,
+    0x1EE0, 0x1EE0,
+    0x1EE2, 0x1EE2,
+    0x1EE4, 0x1EE4,
+    0x1EE6, 0x1EE6,
+    0x1EE8, 0x1EE8,
+    0x1EEA, 0x1EEA,
+    0x1EEC, 0x1EEC,
+    0x1EEE, 0x1EEE,
+    0x1EF0, 0x1EF0,
+    0x1EF2, 0x1EF2,
+    0x1EF4, 0x1EF4,
+    0x1EF6, 0x1EF6,
+    0x1EF8, 0x1EF8,
+    0x1EFA, 0x1EFA,
+    0x1EFC, 0x1EFC,
+    0x1EFE, 0x1EFE,
+    0x1F08, 0x1F0F,
+    0x1F18, 0x1F1D,
+    0x1F28, 0x1F2F,
+    0x1F38, 0x1F3F,
+    0x1F48, 0x1F4D,
+    0x1F59, 0x1F59,
+    0x1F5B, 0x1F5B,
+    0x1F5D, 0x1F5D,
+    0x1F5F, 0x1F5F,
+    0x1F68, 0x1F6F,
+    0x1F88, 0x1F8F,
+    0x1F98, 0x1F9F,
+    0x1FA8, 0x1FAF,
+    0x1FB8, 0x1FBC,
+    0x1FC8, 0x1FCC,
+    0x1FD8, 0x1FDB,
+    0x1FE8, 0x1FEC,
+    0x1FF8, 0x1FFC,
+    0x2102, 0x2102,
+    0x2107, 0x2107,
+    0x210B, 0x210D,
+    0x2110, 0x2112,
+    0x2115, 0x2115,
+    0x2119, 0x211D,
+    0x2124, 0x2124,
+    0x2126, 0x2126,
+    0x2128, 0x2128,
+    0x212A, 0x212D,
+    0x2130, 0x2133,
+    0x213E, 0x213F,
+    0x2145, 0x2145,
+    0x2160, 0x216F,
+    0x2183, 0x2183,
+    0x24B6, 0x24CF,
+    0x2C00, 0x2C2F,
+    0x2C60, 0x2C60,
+    0x2C62, 0x2C64,
+    0x2C67, 0x2C67,
+    0x2C69, 0x2C69,
+    0x2C6B, 0x2C6B,
+    0x2C6D, 0x2C70,
+    0x2C72, 0x2C72,
+    0x2C75, 0x2C75,
+    0x2C7E, 0x2C80,
+    0x2C82, 0x2C82,
+    0x2C84, 0x2C84,
+    0x2C86, 0x2C86,
+    0x2C88, 0x2C88,
+    0x2C8A, 0x2C8A,
+    0x2C8C, 0x2C8C,
+    0x2C8E, 0x2C8E,
+    0x2C90, 0x2C90,
+    0x2C92, 0x2C92,
+    0x2C94, 0x2C94,
+    0x2C96, 0x2C96,
+    0x2C98, 0x2C98,
+    0x2C9A, 0x2C9A,
+    0x2C9C, 0x2C9C,
+    0x2C9E, 0x2C9E,
+    0x2CA0, 0x2CA0,
+    0x2CA2, 0x2CA2,
+    0x2CA4, 0x2CA4,
+    0x2CA6, 0x2CA6,
+    0x2CA8, 0x2CA8,
+    0x2CAA, 0x2CAA,
+    0x2CAC, 0x2CAC,
+    0x2CAE, 0x2CAE,
+    0x2CB0, 0x2CB0,
+    0x2CB2, 0x2CB2,
+    0x2CB4, 0x2CB4,
+    0x2CB6, 0x2CB6,
+    0x2CB8, 0x2CB8,
+    0x2CBA, 0x2CBA,
+    0x2CBC, 0x2CBC,
+    0x2CBE, 0x2CBE,
+    0x2CC0, 0x2CC0,
+    0x2CC2, 0x2CC2,
+    0x2CC4, 0x2CC4,
+    0x2CC6, 0x2CC6,
+    0x2CC8, 0x2CC8,
+    0x2CCA, 0x2CCA,
+    0x2CCC, 0x2CCC,
+    0x2CCE, 0x2CCE,
+    0x2CD0, 0x2CD0,
+    0x2CD2, 0x2CD2,
+    0x2CD4, 0x2CD4,
+    0x2CD6, 0x2CD6,
+    0x2CD8, 0x2CD8,
+    0x2CDA, 0x2CDA,
+    0x2CDC, 0x2CDC,
+    0x2CDE, 0x2CDE,
+    0x2CE0, 0x2CE0,
+    0x2CE2, 0x2CE2,
+    0x2CEB, 0x2CEB,
+    0x2CED, 0x2CED,
+    0x2CF2, 0x2CF2,
+    0xA640, 0xA640,
+    0xA642, 0xA642,
+    0xA644, 0xA644,
+    0xA646, 0xA646,
+    0xA648, 0xA648,
+    0xA64A, 0xA64A,
+    0xA64C, 0xA64C,
+    0xA64E, 0xA64E,
+    0xA650, 0xA650,
+    0xA652, 0xA652,
+    0xA654, 0xA654,
+    0xA656, 0xA656,
+    0xA658, 0xA658,
+    0xA65A, 0xA65A,
+    0xA65C, 0xA65C,
+    0xA65E, 0xA65E,
+    0xA660, 0xA660,
+    0xA662, 0xA662,
+    0xA664, 0xA664,
+    0xA666, 0xA666,
+    0xA668, 0xA668,
+    0xA66A, 0xA66A,
+    0xA66C, 0xA66C,
+    0xA680, 0xA680,
+    0xA682, 0xA682,
+    0xA684, 0xA684,
+    0xA686, 0xA686,
+    0xA688, 0xA688,
+    0xA68A, 0xA68A,
+    0xA68C, 0xA68C,
+    0xA68E, 0xA68E,
+    0xA690, 0xA690,
+    0xA692, 0xA692,
+    0xA694, 0xA694,
+    0xA696, 0xA696,
+    0xA698, 0xA698,
+    0xA69A, 0xA69A,
+    0xA722, 0xA722,
+    0xA724, 0xA724,
+    0xA726, 0xA726,
+    0xA728, 0xA728,
+    0xA72A, 0xA72A,
+    0xA72C, 0xA72C,
+    0xA72E, 0xA72E,
+    0xA732, 0xA732,
+    0xA734, 0xA734,
+    0xA736, 0xA736,
+    0xA738, 0xA738,
+    0xA73A, 0xA73A,
+    0xA73C, 0xA73C,
+    0xA73E, 0xA73E,
+    0xA740, 0xA740,
+    0xA742, 0xA742,
+    0xA744, 0xA744,
+    0xA746, 0xA746,
+    0xA748, 0xA748,
+    0xA74A, 0xA74A,
+    0xA74C, 0xA74C,
+    0xA74E, 0xA74E,
+    0xA750, 0xA750,
+    0xA752, 0xA752,
+    0xA754, 0xA754,
+    0xA756, 0xA756,
+    0xA758, 0xA758,
+    0xA75A, 0xA75A,
+    0xA75C, 0xA75C,
+    0xA75E, 0xA75E,
+    0xA760, 0xA760,
+    0xA762, 0xA762,
+    0xA764, 0xA764,
+    0xA766, 0xA766,
+    0xA768, 0xA768,
+    0xA76A, 0xA76A,
+    0xA76C, 0xA76C,
+    0xA76E, 0xA76E,
+    0xA779, 0xA779,
+    0xA77B, 0xA77B,
+    0xA77D, 0xA77E,
+    0xA780, 0xA780,
+    0xA782, 0xA782,
+    0xA784, 0xA784,
+    0xA786, 0xA786,
+    0xA78B, 0xA78B,
+    0xA78D, 0xA78D,
+    0xA790, 0xA790,
+    0xA792, 0xA792,
+    0xA796, 0xA796,
+    0xA798, 0xA798,
+    0xA79A, 0xA79A,
+    0xA79C, 0xA79C,
+    0xA79E, 0xA79E,
+    0xA7A0, 0xA7A0,
+    0xA7A2, 0xA7A2,
+    0xA7A4, 0xA7A4,
+    0xA7A6, 0xA7A6,
+    0xA7A8, 0xA7A8,
+    0xA7AA, 0xA7AE,
+    0xA7B0, 0xA7B4,
+    0xA7B6, 0xA7B6,
+    0xA7B8, 0xA7B8,
+    0xA7BA, 0xA7BA,
+    0xA7BC, 0xA7BC,
+    0xA7BE, 0xA7BE,
+    0xA7C0, 0xA7C0,
+    0xA7C2, 0xA7C2,
+    0xA7C4, 0xA7C7,
+    0xA7C9, 0xA7C9,
+    0xA7CB, 0xA7CC,
+    0xA7CE, 0xA7CE,
+    0xA7D0, 0xA7D0,
+    0xA7D2, 0xA7D2,
+    0xA7D4, 0xA7D4,
+    0xA7D6, 0xA7D6,
+    0xA7D8, 0xA7D8,
+    0xA7DA, 0xA7DA,
+    0xA7DC, 0xA7DC,
+    0xA7F5, 0xA7F5,
+    0xFF21, 0xFF3A,
+    0x10400, 0x10427,
+    0x104B0, 0x104D3,
+    0x10570, 0x1057A,
+    0x1057C, 0x1058A,
+    0x1058C, 0x10592,
+    0x10594, 0x10595,
+    0x10C80, 0x10CB2,
+    0x10D50, 0x10D65,
+    0x118A0, 0x118BF,
+    0x16E40, 0x16E5F,
+    0x16EA0, 0x16EB8,
+    0x1D400, 0x1D419,
+    0x1D434, 0x1D44D,
+    0x1D468, 0x1D481,
+    0x1D49C, 0x1D49C,
+    0x1D49E, 0x1D49F,
+    0x1D4A2, 0x1D4A2,
+    0x1D4A5, 0x1D4A6,
+    0x1D4A9, 0x1D4AC,
+    0x1D4AE, 0x1D4B5,
+    0x1D4D0, 0x1D4E9,
+    0x1D504, 0x1D505,
+    0x1D507, 0x1D50A,
+    0x1D50D, 0x1D514,
+    0x1D516, 0x1D51C,
+    0x1D538, 0x1D539,
+    0x1D53B, 0x1D53E,
+    0x1D540, 0x1D544,
+    0x1D546, 0x1D546,
+    0x1D54A, 0x1D550,
+    0x1D56C, 0x1D585,
+    0x1D5A0, 0x1D5B9,
+    0x1D5D4, 0x1D5ED,
+    0x1D608, 0x1D621,
+    0x1D63C, 0x1D655,
+    0x1D670, 0x1D689,
+    0x1D6A8, 0x1D6C0,
+    0x1D6E2, 0x1D6FA,
+    0x1D71C, 0x1D734,
+    0x1D756, 0x1D76E,
+    0x1D790, 0x1D7A8,
+    0x1D7CA, 0x1D7CA,
+    0x1E900, 0x1E921,
+    0x1F130, 0x1F149,
+    0x1F150, 0x1F169,
+    0x1F170, 0x1F189,
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding unicode codepoint. Note that
+ * this table is different from other encodings where we used a lookup table
+ * because the indices of those tables are the byte representations, not the
+ * codepoints themselves.
+ */
+const uint8_t pm_encoding_unicode_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Binary search through the given list of codepoints to see if the given
+ * codepoint is in the list.
+ */
+static bool
+pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
+    size_t start = 0;
+    size_t end = size;
+
+    while (start < end) {
+        size_t middle = start + (end - start) / 2;
+        if ((middle % 2) != 0) middle--;
+
+        if (codepoint >= codepoints[middle] && codepoint <= codepoints[middle + 1]) {
+            return true;
+        }
+
+        if (codepoint < codepoints[middle]) {
+            end = middle;
+        } else {
+            start = middle + 2;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * A state transition table for decoding UTF-8.
+ *
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+static const uint8_t pm_utf_8_dfa[] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+    8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+    0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+    0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+    0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+    1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+    1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+    1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+
+/**
+ * Given a pointer to a string and the number of bytes remaining in the string,
+ * decode the next UTF-8 codepoint and return it. The number of bytes consumed
+ * is returned in the width out parameter.
+ */
+static pm_unicode_codepoint_t
+pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
+    assert(n >= 0);
+
+    size_t maximum = (n > 4) ? 4 : ((size_t) n);
+    uint32_t codepoint;
+    uint32_t state = 0;
+
+    for (size_t index = 0; index < maximum; index++) {
+        uint32_t byte = b[index];
+        uint32_t type = pm_utf_8_dfa[byte];
+
+        codepoint = (state != 0) ?
+            (byte & 0x3fu) | (codepoint << 6) :
+            (0xffu >> type) & (byte);
+
+        state = pm_utf_8_dfa[256 + (state * 16) + type];
+        if (state == 0) {
+            *width = index + 1;
+            return (pm_unicode_codepoint_t) codepoint;
+        }
+    }
+
+    *width = 0;
+    return 0;
+}
+
+/**
+ * Return the size of the next character in the UTF-8 encoding.
+ */
+size_t
+pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
+    assert(n >= 0);
+
+    size_t maximum = (n > 4) ? 4 : ((size_t) n);
+    uint32_t state = 0;
+
+    for (size_t index = 0; index < maximum; index++) {
+        state = pm_utf_8_dfa[256 + (state * 16) + pm_utf_8_dfa[b[index]]];
+        if (state == 0) return index + 1;
+    }
+
+    return 0;
+}
+
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphabetical character.
+ */
+size_t
+pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
+    }
+
+    size_t width;
+    pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
+
+    if (codepoint <= 0xFF) {
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
+    } else {
+        return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
+    }
+}
+
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphanumeric character.
+ */
+size_t
+pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
+    }
+
+    size_t width;
+    pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
+
+    if (codepoint <= 0xFF) {
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
+    } else {
+        return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
+    }
+}
+
+/**
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
+ * character.
+ */
+bool
+pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
+    }
+
+    size_t width;
+    pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
+
+    if (codepoint <= 0xFF) {
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
+    } else {
+        return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
+    }
+}
+
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+
+static pm_unicode_codepoint_t
+pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
+
+    if ((n > 0) && (b[0] < 0x80)) {
+        *width = 1;
+        return (pm_unicode_codepoint_t) b[0];
+    }
+
+    if (n > 1 && b[0] >= 0xC2 && b[0] <= 0xDF && b[1] >= 0x80 && b[1] <= 0xBF) {
+        *width = 2;
+
+        // 110xxxxx 10xxxxxx
+        return (pm_unicode_codepoint_t) (((b[0] & 0x1F) << 6) | (b[1] & 0x3F));
+    }
+
+    if (n > 5 && b[0] == 0xED && b[1] >= 0xA0 && b[1] <= 0xAF && b[2] >= 0x80 && b[2] <= 0xBF && b[3] == 0xED && b[4] >= 0xB0 && b[4] <= 0xBF && b[5] >= 0x80 && b[5] <= 0xBF) {
+        *width = 6;
+
+        // 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
+        return (pm_unicode_codepoint_t) (0x10000 + (((b[1] & 0xF) << 16) | ((b[2] & 0x3F) << 10) | ((b[4] & 0xF) << 6) | (b[5] & 0x3F)));
+    }
+
+    if (n > 2 && b[0] == 0xED && b[1] >= 0xA0 && b[1] <= 0xBF) {
+        *width = 3;
+
+        // 11101101 1010xxxx 10xxxxx
+        return (pm_unicode_codepoint_t) (0x10000 + (((b[0] & 0x03) << 16) | ((b[1] & 0x3F) << 10) | (b[2] & 0x3F)));
+    }
+
+    if (n > 2 && ((b[0] == 0xE0 && b[1] >= 0xA0) || (b[0] >= 0xE1 && b[0] <= 0xEF && b[1] >= 0x80)) && b[1] <= 0xBF && b[2] >= 0x80 && b[2] <= 0xBF) {
+        *width = 3;
+
+        // 1110xxxx 10xxxxxx 10xxxxx
+        return (pm_unicode_codepoint_t) (((b[0] & 0xF) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F));
+    }
+
+    *width = 0;
+    return 0;
+}
+
+static size_t
+pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    size_t width;
+    pm_cesu_8_codepoint(b, n, &width);
+    return width;
+}
+
+static size_t
+pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
+    }
+
+    size_t width;
+    pm_unicode_codepoint_t codepoint = pm_cesu_8_codepoint(b, n, &width);
+
+    if (codepoint <= 0xFF) {
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
+    } else {
+        return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
+    }
+}
+
+static size_t
+pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
+    }
+
+    size_t width;
+    pm_unicode_codepoint_t codepoint = pm_cesu_8_codepoint(b, n, &width);
+
+    if (codepoint <= 0xFF) {
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
+    } else {
+        return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
+    }
+}
+
+static bool
+pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+
+    if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
+    }
+
+    size_t width;
+    pm_unicode_codepoint_t codepoint = pm_cesu_8_codepoint(b, n, &width);
+
+    if (codepoint <= 0xFF) {
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
+    } else {
+        return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
+    }
+}
+
+#endif
+
+#undef UNICODE_ALPHA_CODEPOINTS_LENGTH
+#undef UNICODE_ALNUM_CODEPOINTS_LENGTH
+#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding US-ASCII character.
+ */
+static const uint8_t pm_encoding_ascii_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding CP850 character.
+ */
+static const uint8_t pm_encoding_cp850_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding CP852 character.
+ */
+static const uint8_t pm_encoding_cp852_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding CP855 character.
+ */
+static const uint8_t pm_encoding_cp855_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding GB1988 character.
+ */
+static const uint8_t pm_encoding_gb1988_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM437 character.
+ */
+static const uint8_t pm_encoding_ibm437_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM720 character.
+ */
+static const uint8_t pm_encoding_ibm720_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM737 character.
+ */
+static const uint8_t pm_encoding_ibm737_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM775 character.
+ */
+static const uint8_t pm_encoding_ibm775_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM852 character.
+ */
+static const uint8_t pm_encoding_ibm852_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM855 character.
+ */
+static const uint8_t pm_encoding_ibm855_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM857 character.
+ */
+static const uint8_t pm_encoding_ibm857_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM860 character.
+ */
+static const uint8_t pm_encoding_ibm860_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM861 character.
+ */
+static const uint8_t pm_encoding_ibm861_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM862 character.
+ */
+static const uint8_t pm_encoding_ibm862_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM863 character.
+ */
+static const uint8_t pm_encoding_ibm863_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM864 character.
+ */
+static const uint8_t pm_encoding_ibm864_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM865 character.
+ */
+static const uint8_t pm_encoding_ibm865_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM866 character.
+ */
+static const uint8_t pm_encoding_ibm866_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding IBM869 character.
+ */
+static const uint8_t pm_encoding_ibm869_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-1 character.
+ */
+static const uint8_t pm_encoding_iso_8859_1_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-2 character.
+ */
+static const uint8_t pm_encoding_iso_8859_2_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 0, 7, 0, 7, 7, 0, 0, 7, 7, 7, 7, 0, 7, 7, // Ax
+    0, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 0, 3, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-3 character.
+ */
+static const uint8_t pm_encoding_iso_8859_3_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 0, 0, 0, 0, 7, 0, 0, 7, 7, 7, 7, 0, 0, 7, // Ax
+    0, 3, 0, 0, 0, 3, 3, 0, 0, 3, 3, 3, 3, 0, 0, 3, // Bx
+    7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    0, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-4 character.
+ */
+static const uint8_t pm_encoding_iso_8859_4_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 3, 7, 0, 7, 7, 0, 0, 7, 7, 7, 7, 0, 7, 0, // Ax
+    0, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 7, 3, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-5 character.
+ */
+static const uint8_t pm_encoding_iso_8859_5_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 7, 7, // Ax
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-6 character.
+ */
+static const uint8_t pm_encoding_iso_8859_6_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-7 character.
+ */
+static const uint8_t pm_encoding_iso_8859_7_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 7, 0, 7, 7, 7, 0, 7, 0, 7, 7, // Bx
+    3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-8 character.
+ */
+static const uint8_t pm_encoding_iso_8859_8_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-9 character.
+ */
+static const uint8_t pm_encoding_iso_8859_9_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-10 character.
+ */
+static const uint8_t pm_encoding_iso_8859_10_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 0, 7, 7, // Ax
+    0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 3, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-11 character.
+ */
+static const uint8_t pm_encoding_iso_8859_11_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ax
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Bx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-13 character.
+ */
+static const uint8_t pm_encoding_iso_8859_13_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-14 character.
+ */
+static const uint8_t pm_encoding_iso_8859_14_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 3, 0, 7, 3, 7, 0, 7, 0, 7, 3, 7, 0, 0, 7, // Ax
+    7, 3, 7, 3, 7, 3, 0, 7, 3, 3, 3, 7, 3, 7, 3, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-15 character.
+ */
+static const uint8_t pm_encoding_iso_8859_15_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 7, 0, 3, 0, 3, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 7, 3, 0, 0, 3, 0, 3, 0, 7, 3, 7, 0, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding ISO-8859-16 character.
+ */
+static const uint8_t pm_encoding_iso_8859_16_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 7, 3, 7, 0, 0, 7, 0, 3, 0, 7, 0, 7, 0, 3, 7, // Ax
+    0, 0, 7, 3, 7, 0, 0, 0, 3, 3, 3, 0, 7, 3, 7, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding KOI8-R character.
+ */
+static const uint8_t pm_encoding_koi8_r_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Dx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Ex
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding KOI8-U character.
+ */
+static const uint8_t pm_encoding_koi8_u_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 0, // Ax
+    0, 0, 0, 7, 7, 0, 7, 7, 0, 0, 0, 0, 0, 7, 0, 0, // Bx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Dx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Ex
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macCentEuro character.
+ */
+static const uint8_t pm_encoding_mac_cent_euro_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macCroatian character.
+ */
+static const uint8_t pm_encoding_mac_croatian_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+ /**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macCyrillic character.
+ */
+static const uint8_t pm_encoding_mac_cyrillic_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macGreek character.
+ */
+static const uint8_t pm_encoding_mac_greek_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macIceland character.
+ */
+static const uint8_t pm_encoding_mac_iceland_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macRoman character.
+ */
+static const uint8_t pm_encoding_mac_roman_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macRomania character.
+ */
+static const uint8_t pm_encoding_mac_romania_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macThai character.
+ */
+static const uint8_t pm_encoding_mac_thai_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding TIS-620 character.
+ */
+static const uint8_t pm_encoding_tis_620_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ax
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Bx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macTurkish character.
+ */
+static const uint8_t pm_encoding_mac_turkish_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding macUkraine character.
+ */
+static const uint8_t pm_encoding_mac_ukraine_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1250 character.
+ */
+static const uint8_t pm_encoding_windows_1250_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 7, 7, 7, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 3, 3, 3, // 9x
+    0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, 0, 0, 0, 0, 7, // Ax
+    0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 3, 0, 7, 0, 3, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1251 character.
+ */
+static const uint8_t pm_encoding_windows_1251_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    7, 7, 0, 3, 0, 0, 0, 0, 0, 0, 7, 0, 7, 7, 7, 7, // 8x
+    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 3, 3, 3, // 9x
+    0, 7, 3, 7, 0, 7, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, // Ax
+    0, 0, 7, 3, 3, 3, 0, 0, 3, 0, 3, 0, 3, 7, 3, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1252 character.
+ */
+static const uint8_t pm_encoding_windows_1252_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 7, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 7, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1253 character.
+ */
+static const uint8_t pm_encoding_windows_1253_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 7, 0, 7, 7, 7, 0, 7, 0, 7, 7, // Bx
+    3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1254 character.
+ */
+static const uint8_t pm_encoding_windows_1254_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 7, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1255 character.
+ */
+static const uint8_t pm_encoding_windows_1255_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1256 character.
+ */
+static const uint8_t pm_encoding_windows_1256_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Cx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1257 character.
+ */
+static const uint8_t pm_encoding_windows_1257_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 7, 0, 0, 0, 0, 7, // Ax
+    0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, // Bx
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Cx
+    7, 7, 7, 7, 7, 7, 7, 0, 7, 7, 7, 7, 7, 7, 7, 3, // Dx
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Ex
+    3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-1258 character.
+ */
+static const uint8_t pm_encoding_windows_1258_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+/**
+ * Each element of the following table contains a bitfield that indicates a
+ * piece of information about the corresponding windows-874 character.
+ */
+static const uint8_t pm_encoding_windows_874_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3x
+    0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 4x
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, // 5x
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6x
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+#define PRISM_ENCODING_TABLE(name) \
+    static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) {           \
+        return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT));           \
+    }                                                                                                         \
+    static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) {           \
+        return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
+    }                                                                                                         \
+    static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) {           \
+        return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT));            \
+    }
+
+PRISM_ENCODING_TABLE(cp850)
+PRISM_ENCODING_TABLE(cp852)
+PRISM_ENCODING_TABLE(cp855)
+PRISM_ENCODING_TABLE(gb1988)
+PRISM_ENCODING_TABLE(ibm437)
+PRISM_ENCODING_TABLE(ibm720)
+PRISM_ENCODING_TABLE(ibm737)
+PRISM_ENCODING_TABLE(ibm775)
+PRISM_ENCODING_TABLE(ibm852)
+PRISM_ENCODING_TABLE(ibm855)
+PRISM_ENCODING_TABLE(ibm857)
+PRISM_ENCODING_TABLE(ibm860)
+PRISM_ENCODING_TABLE(ibm861)
+PRISM_ENCODING_TABLE(ibm862)
+PRISM_ENCODING_TABLE(ibm863)
+PRISM_ENCODING_TABLE(ibm864)
+PRISM_ENCODING_TABLE(ibm865)
+PRISM_ENCODING_TABLE(ibm866)
+PRISM_ENCODING_TABLE(ibm869)
+PRISM_ENCODING_TABLE(iso_8859_1)
+PRISM_ENCODING_TABLE(iso_8859_2)
+PRISM_ENCODING_TABLE(iso_8859_3)
+PRISM_ENCODING_TABLE(iso_8859_4)
+PRISM_ENCODING_TABLE(iso_8859_5)
+PRISM_ENCODING_TABLE(iso_8859_6)
+PRISM_ENCODING_TABLE(iso_8859_7)
+PRISM_ENCODING_TABLE(iso_8859_8)
+PRISM_ENCODING_TABLE(iso_8859_9)
+PRISM_ENCODING_TABLE(iso_8859_10)
+PRISM_ENCODING_TABLE(iso_8859_11)
+PRISM_ENCODING_TABLE(iso_8859_13)
+PRISM_ENCODING_TABLE(iso_8859_14)
+PRISM_ENCODING_TABLE(iso_8859_15)
+PRISM_ENCODING_TABLE(iso_8859_16)
+PRISM_ENCODING_TABLE(koi8_r)
+PRISM_ENCODING_TABLE(koi8_u)
+PRISM_ENCODING_TABLE(mac_cent_euro)
+PRISM_ENCODING_TABLE(mac_croatian)
+PRISM_ENCODING_TABLE(mac_cyrillic)
+PRISM_ENCODING_TABLE(mac_greek)
+PRISM_ENCODING_TABLE(mac_iceland)
+PRISM_ENCODING_TABLE(mac_roman)
+PRISM_ENCODING_TABLE(mac_romania)
+PRISM_ENCODING_TABLE(mac_thai)
+PRISM_ENCODING_TABLE(mac_turkish)
+PRISM_ENCODING_TABLE(mac_ukraine)
+PRISM_ENCODING_TABLE(tis_620)
+PRISM_ENCODING_TABLE(windows_1250)
+PRISM_ENCODING_TABLE(windows_1251)
+PRISM_ENCODING_TABLE(windows_1252)
+PRISM_ENCODING_TABLE(windows_1253)
+PRISM_ENCODING_TABLE(windows_1254)
+PRISM_ENCODING_TABLE(windows_1255)
+PRISM_ENCODING_TABLE(windows_1256)
+PRISM_ENCODING_TABLE(windows_1257)
+PRISM_ENCODING_TABLE(windows_1258)
+PRISM_ENCODING_TABLE(windows_874)
+
+#undef PRISM_ENCODING_TABLE
+#endif
+
+/**
+ * Returns the size of the next character in the ASCII encoding. This basically
+ * means that if the top bit is not set, the character is 1 byte long.
+ */
+static size_t
+pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
+    return ((n > 0) && (*b < 0x80)) ? 1 : 0;
+}
+
+/**
+ * Return the size of the next character in the ASCII encoding if it is an
+ * alphabetical character.
+ */
+static size_t
+pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
+}
+
+/**
+ * Certain encodings are equivalent to ASCII below 0x80, so it works for our
+ * purposes to have a function here that first checks the bounds and then falls
+ * back to checking the ASCII lookup table.
+ */
+static size_t
+pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
+    return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
+}
+
+/**
+ * Return the size of the next character in the ASCII encoding if it is an
+ * alphanumeric character.
+ */
+static size_t
+pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
+}
+
+/**
+ * Certain encodings are equivalent to ASCII below 0x80, so it works for our
+ * purposes to have a function here that first checks the bounds and then falls
+ * back to checking the ASCII lookup table.
+ */
+static size_t
+pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
+    return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
+}
+
+/**
+ * Return true if the next character in the ASCII encoding if it is an uppercase
+ * character.
+ */
+static bool
+pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
+}
+
+/**
+ * For a lot of encodings the default is that they are a single byte long no
+ * matter what the codepoint, so this function is shared between them.
+ */
+static size_t
+pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
+    return 1;
+}
+
+/**
+ * Returns the size of the next character in the EUC-JP encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters.
+    if ((n > 0) && (*b < 0x80)) {
+        return 1;
+    }
+
+    // These are the double byte characters.
+    if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
+        return 2;
+    }
+
+    // These are the triple byte characters.
+    if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) {
+        return 3;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the EUC-JP encoding if it is an
+ * uppercase character.
+ */
+static bool
+pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    size_t width = pm_encoding_euc_jp_char_width(b, n);
+
+    if (width == 1) {
+        return pm_encoding_ascii_isupper_char(b, n);
+    } else if (width == 2) {
+        return (
+            (b[0] == 0xA3 && b[1] >= 0xC1 && b[1] <= 0xDA) ||
+            (b[0] == 0xA6 && b[1] >= 0xA1 && b[1] <= 0xB8) ||
+            (b[0] == 0xA7 && b[1] >= 0xA1 && b[1] <= 0xC1)
+        );
+    } else {
+        return false;
+    }
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
+    if (n == 0) {
+        return 0;
+    }
+    // These are the single byte characters.
+    if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
+        return 1;
+    }
+
+    // These are the double byte characters.
+    if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC && b[1] != 0x7F)) {
+        return 2;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding if it is an
+ * alphanumeric character.
+ */
+static size_t
+pm_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    size_t width = pm_encoding_shift_jis_char_width(b, n);
+    return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alnum_char(b, n)) : width;
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding if it is an
+ * alphabetical character.
+ */
+static size_t
+pm_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    size_t width = pm_encoding_shift_jis_char_width(b, n);
+    return width == 1 ? ((b[0] >= 0x80) || pm_encoding_ascii_alpha_char(b, n)) : width;
+}
+
+/**
+ * Returns the size of the next character in the Shift_JIS encoding if it is an
+ * uppercase character.
+ */
+static bool
+pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    size_t width = pm_encoding_shift_jis_char_width(b, n);
+
+    if (width == 1) {
+        return pm_encoding_ascii_isupper_char(b, n);
+    } else if (width == 2) {
+        return (
+            ((b[0] == 0x82) && (b[1] >= 0x60 && b[1] <= 0x79)) ||
+            ((b[0] == 0x83) && (b[1] >= 0x9F && b[1] <= 0xB6)) ||
+            ((b[0] == 0x84) && (b[1] >= 0x40 && b[1] <= 0x60))
+        );
+    } else {
+        return width;
+    }
+}
+
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+
+/**
+ * Certain encodings are equivalent to ASCII below 0x80, so it works for our
+ * purposes to have a function here that first checks the bounds and then falls
+ * back to checking the ASCII lookup table.
+ */
+static bool
+pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
+    return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
+}
+
+/**
+ * Returns the size of the next character in the Big5 encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters.
+    if ((n > 0) && (*b < 0x80)) {
+        return 1;
+    }
+
+    // These are the double byte characters.
+    if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
+        return 2;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the CP949 encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters
+    if ((n > 0) && (*b <= 0x80)) {
+        return 1;
+    }
+
+    // These are the double byte characters
+    if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xFE) && ((b[1] >= 0x41 && b[1] <= 0x5A) || (b[1] >= 0x61 && b[1] <= 0x7A) || (b[1] >= 0x81 && b[1] <= 0xFE))) {
+        return 2;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the Emacs MULE encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the 1 byte characters.
+    if ((n > 0) && (*b < 0x80)) {
+        return 1;
+    }
+
+    // These are the 2 byte characters.
+    if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0x8F) && (b[1] >= 0xA0)) {
+        return 2;
+    }
+
+    // These are the 3 byte characters.
+    if (
+        (n > 2) &&
+        (
+            ((b[0] >= 0x90 && b[0] <= 0x99) && (b[1] >= 0xA0)) ||
+            ((b[0] == 0x9A || b[0] == 0x9B) && (b[1] >= 0xE0 && b[1] <= 0xEF))
+        ) &&
+        (b[2] >= 0xA0)
+    ) {
+        return 3;
+    }
+
+    // These are the 4 byte characters.
+    if (
+        (n > 3) &&
+        (
+            ((b[0] == 0x9C) && (b[1] >= 0xF0) && (b[1] <= 0xF4)) ||
+            ((b[0] == 0x9D) && (b[1] >= 0xF5) && (b[1] <= 0xFE))
+        ) &&
+        (b[2] >= 0xA0) && (b[3] >= 0xA0)
+    ) {
+        return 4;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the EUC-KR encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters.
+    if ((n > 0) && (*b < 0x80)) {
+        return 1;
+    }
+
+    // These are the double byte characters.
+    if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) {
+        return 2;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the EUC-TW encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters.
+    if ((n > 0) && (*b < 0x80)) {
+        return 1;
+    }
+
+    // These are the double byte characters.
+    if ((n > 1) && (b[0] >= 0xA1) && (b[0] <= 0xFE) && (b[1] >= 0xA1) && (b[1] <= 0xFE)) {
+        return 2;
+    }
+
+    // These are the quadruple byte characters.
+    if ((n > 3) && (b[0] == 0x8E) && (b[1] >= 0xA1) && (b[1] <= 0xB0) && (b[2] >= 0xA1) && (b[2] <= 0xFE) && (b[3] >= 0xA1) && (b[3] <= 0xFE)) {
+        return 4;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the GB18030 encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the 1 byte characters.
+    if ((n > 0) && (*b < 0x80)) {
+        return 1;
+    }
+
+    // These are the 2 byte characters.
+    if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE && b[1] != 0x7F)) {
+        return 2;
+    }
+
+    // These are the 4 byte characters.
+    if ((n > 3) && ((b[0] >= 0x81 && b[0] <= 0xFE) && (b[1] >= 0x30 && b[1] <= 0x39) && (b[2] >= 0x81 && b[2] <= 0xFE) && (b[3] >= 0x30 && b[3] <= 0x39))) {
+        return 4;
+    }
+
+    return 0;
+}
+
+/**
+ * Returns the size of the next character in the GBK encoding, or 0 if a
+ * character cannot be decoded from the given bytes.
+ */
+static size_t
+pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters.
+    if ((n > 0) && (*b <= 0x80)) {
+        return 1;
+    }
+
+    // These are the double byte characters.
+    if (
+        (n > 1) &&
+        (
+            ((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1
+            ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2
+            ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3
+            ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4
+            ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5
+            ((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1
+            ((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2
+            ((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3
+        )
+    ) {
+        return 2;
+    }
+
+    return 0;
+}
+
+#endif
+
+/**
+ * This is the table of all of the encodings that prism supports.
+ */
+const pm_encoding_t pm_encodings[] = {
+    [PM_ENCODING_UTF_8] = {
+        .name = "UTF-8",
+        .char_width = pm_encoding_utf_8_char_width,
+        .alnum_char = pm_encoding_utf_8_alnum_char,
+        .alpha_char = pm_encoding_utf_8_alpha_char,
+        .isupper_char = pm_encoding_utf_8_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_US_ASCII] = {
+        .name = "US-ASCII",
+        .char_width = pm_encoding_ascii_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char,
+        .alpha_char = pm_encoding_ascii_alpha_char,
+        .isupper_char = pm_encoding_ascii_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ASCII_8BIT] = {
+        .name = "ASCII-8BIT",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char,
+        .alpha_char = pm_encoding_ascii_alpha_char,
+        .isupper_char = pm_encoding_ascii_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_EUC_JP] = {
+        .name = "EUC-JP",
+        .char_width = pm_encoding_euc_jp_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_euc_jp_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_WINDOWS_31J] = {
+        .name = "Windows-31J",
+        .char_width = pm_encoding_shift_jis_char_width,
+        .alnum_char = pm_encoding_shift_jis_alnum_char,
+        .alpha_char = pm_encoding_shift_jis_alpha_char,
+        .isupper_char = pm_encoding_shift_jis_isupper_char,
+        .multibyte = true
+    },
+
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+    [PM_ENCODING_BIG5] = {
+        .name = "Big5",
+        .char_width = pm_encoding_big5_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_BIG5_HKSCS] = {
+        .name = "Big5-HKSCS",
+        .char_width = pm_encoding_big5_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_BIG5_UAO] = {
+        .name = "Big5-UAO",
+        .char_width = pm_encoding_big5_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_CESU_8] = {
+        .name = "CESU-8",
+        .char_width = pm_encoding_cesu_8_char_width,
+        .alnum_char = pm_encoding_cesu_8_alnum_char,
+        .alpha_char = pm_encoding_cesu_8_alpha_char,
+        .isupper_char = pm_encoding_cesu_8_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_CP51932] = {
+        .name = "CP51932",
+        .char_width = pm_encoding_euc_jp_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_euc_jp_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_CP850] = {
+        .name = "CP850",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_cp850_alnum_char,
+        .alpha_char = pm_encoding_cp850_alpha_char,
+        .isupper_char = pm_encoding_cp850_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_CP852] = {
+        .name = "CP852",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_cp852_alnum_char,
+        .alpha_char = pm_encoding_cp852_alpha_char,
+        .isupper_char = pm_encoding_cp852_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_CP855] = {
+        .name = "CP855",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_cp855_alnum_char,
+        .alpha_char = pm_encoding_cp855_alpha_char,
+        .isupper_char = pm_encoding_cp855_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_CP949] = {
+        .name = "CP949",
+        .char_width = pm_encoding_cp949_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_CP950] = {
+        .name = "CP950",
+        .char_width = pm_encoding_big5_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_CP951] = {
+        .name = "CP951",
+        .char_width = pm_encoding_big5_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_EMACS_MULE] = {
+        .name = "Emacs-Mule",
+        .char_width = pm_encoding_emacs_mule_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_EUC_JP_MS] = {
+        .name = "eucJP-ms",
+        .char_width = pm_encoding_euc_jp_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_euc_jp_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_EUC_JIS_2004] = {
+        .name = "EUC-JIS-2004",
+        .char_width = pm_encoding_euc_jp_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_euc_jp_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_EUC_KR] = {
+        .name = "EUC-KR",
+        .char_width = pm_encoding_euc_kr_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_EUC_TW] = {
+        .name = "EUC-TW",
+        .char_width = pm_encoding_euc_tw_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_GB12345] = {
+        .name = "GB12345",
+        .char_width = pm_encoding_euc_kr_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_GB18030] = {
+        .name = "GB18030",
+        .char_width = pm_encoding_gb18030_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_GB1988] = {
+        .name = "GB1988",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_gb1988_alnum_char,
+        .alpha_char = pm_encoding_gb1988_alpha_char,
+        .isupper_char = pm_encoding_gb1988_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_GB2312] = {
+        .name = "GB2312",
+        .char_width = pm_encoding_euc_kr_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_GBK] = {
+        .name = "GBK",
+        .char_width = pm_encoding_gbk_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_IBM437] = {
+        .name = "IBM437",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm437_alnum_char,
+        .alpha_char = pm_encoding_ibm437_alpha_char,
+        .isupper_char = pm_encoding_ibm437_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM720] = {
+        .name = "IBM720",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm720_alnum_char,
+        .alpha_char = pm_encoding_ibm720_alpha_char,
+        .isupper_char = pm_encoding_ibm720_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM737] = {
+        .name = "IBM737",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm737_alnum_char,
+        .alpha_char = pm_encoding_ibm737_alpha_char,
+        .isupper_char = pm_encoding_ibm737_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM775] = {
+        .name = "IBM775",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm775_alnum_char,
+        .alpha_char = pm_encoding_ibm775_alpha_char,
+        .isupper_char = pm_encoding_ibm775_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM852] = {
+        .name = "IBM852",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm852_alnum_char,
+        .alpha_char = pm_encoding_ibm852_alpha_char,
+        .isupper_char = pm_encoding_ibm852_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM855] = {
+        .name = "IBM855",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm855_alnum_char,
+        .alpha_char = pm_encoding_ibm855_alpha_char,
+        .isupper_char = pm_encoding_ibm855_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM857] = {
+        .name = "IBM857",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm857_alnum_char,
+        .alpha_char = pm_encoding_ibm857_alpha_char,
+        .isupper_char = pm_encoding_ibm857_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM860] = {
+        .name = "IBM860",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm860_alnum_char,
+        .alpha_char = pm_encoding_ibm860_alpha_char,
+        .isupper_char = pm_encoding_ibm860_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM861] = {
+        .name = "IBM861",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm861_alnum_char,
+        .alpha_char = pm_encoding_ibm861_alpha_char,
+        .isupper_char = pm_encoding_ibm861_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM862] = {
+        .name = "IBM862",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm862_alnum_char,
+        .alpha_char = pm_encoding_ibm862_alpha_char,
+        .isupper_char = pm_encoding_ibm862_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM863] = {
+        .name = "IBM863",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm863_alnum_char,
+        .alpha_char = pm_encoding_ibm863_alpha_char,
+        .isupper_char = pm_encoding_ibm863_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM864] = {
+        .name = "IBM864",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm864_alnum_char,
+        .alpha_char = pm_encoding_ibm864_alpha_char,
+        .isupper_char = pm_encoding_ibm864_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM865] = {
+        .name = "IBM865",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm865_alnum_char,
+        .alpha_char = pm_encoding_ibm865_alpha_char,
+        .isupper_char = pm_encoding_ibm865_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM866] = {
+        .name = "IBM866",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm866_alnum_char,
+        .alpha_char = pm_encoding_ibm866_alpha_char,
+        .isupper_char = pm_encoding_ibm866_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_IBM869] = {
+        .name = "IBM869",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_ibm869_alnum_char,
+        .alpha_char = pm_encoding_ibm869_alpha_char,
+        .isupper_char = pm_encoding_ibm869_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_1] = {
+        .name = "ISO-8859-1",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_1_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_1_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_1_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_2] = {
+        .name = "ISO-8859-2",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_2_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_2_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_2_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_3] = {
+        .name = "ISO-8859-3",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_3_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_3_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_3_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_4] = {
+        .name = "ISO-8859-4",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_4_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_4_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_4_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_5] = {
+        .name = "ISO-8859-5",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_5_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_5_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_5_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_6] = {
+        .name = "ISO-8859-6",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_6_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_6_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_6_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_7] = {
+        .name = "ISO-8859-7",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_7_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_7_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_7_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_8] = {
+        .name = "ISO-8859-8",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_8_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_8_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_8_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_9] = {
+        .name = "ISO-8859-9",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_9_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_9_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_9_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_10] = {
+        .name = "ISO-8859-10",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_10_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_10_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_10_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_11] = {
+        .name = "ISO-8859-11",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_11_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_11_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_11_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_13] = {
+        .name = "ISO-8859-13",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_13_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_13_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_13_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_14] = {
+        .name = "ISO-8859-14",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_14_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_14_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_14_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_15] = {
+        .name = "ISO-8859-15",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_15_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_15_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_15_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_ISO_8859_16] = {
+        .name = "ISO-8859-16",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_iso_8859_16_alnum_char,
+        .alpha_char = pm_encoding_iso_8859_16_alpha_char,
+        .isupper_char = pm_encoding_iso_8859_16_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_KOI8_R] = {
+        .name = "KOI8-R",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_koi8_r_alnum_char,
+        .alpha_char = pm_encoding_koi8_r_alpha_char,
+        .isupper_char = pm_encoding_koi8_r_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_KOI8_U] = {
+        .name = "KOI8-U",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_koi8_u_alnum_char,
+        .alpha_char = pm_encoding_koi8_u_alpha_char,
+        .isupper_char = pm_encoding_koi8_u_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_CENT_EURO] = {
+        .name = "macCentEuro",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_cent_euro_alnum_char,
+        .alpha_char = pm_encoding_mac_cent_euro_alpha_char,
+        .isupper_char = pm_encoding_mac_cent_euro_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_CROATIAN] = {
+        .name = "macCroatian",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_croatian_alnum_char,
+        .alpha_char = pm_encoding_mac_croatian_alpha_char,
+        .isupper_char = pm_encoding_mac_croatian_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_CYRILLIC] = {
+        .name = "macCyrillic",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_cyrillic_alnum_char,
+        .alpha_char = pm_encoding_mac_cyrillic_alpha_char,
+        .isupper_char = pm_encoding_mac_cyrillic_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_GREEK] = {
+        .name = "macGreek",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_greek_alnum_char,
+        .alpha_char = pm_encoding_mac_greek_alpha_char,
+        .isupper_char = pm_encoding_mac_greek_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_ICELAND] = {
+        .name = "macIceland",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_iceland_alnum_char,
+        .alpha_char = pm_encoding_mac_iceland_alpha_char,
+        .isupper_char = pm_encoding_mac_iceland_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_JAPANESE] = {
+        .name = "MacJapanese",
+        .char_width = pm_encoding_shift_jis_char_width,
+        .alnum_char = pm_encoding_shift_jis_alnum_char,
+        .alpha_char = pm_encoding_shift_jis_alpha_char,
+        .isupper_char = pm_encoding_shift_jis_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_MAC_ROMAN] = {
+        .name = "macRoman",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_roman_alnum_char,
+        .alpha_char = pm_encoding_mac_roman_alpha_char,
+        .isupper_char = pm_encoding_mac_roman_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_ROMANIA] = {
+        .name = "macRomania",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_romania_alnum_char,
+        .alpha_char = pm_encoding_mac_romania_alpha_char,
+        .isupper_char = pm_encoding_mac_romania_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_THAI] = {
+        .name = "macThai",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_thai_alnum_char,
+        .alpha_char = pm_encoding_mac_thai_alpha_char,
+        .isupper_char = pm_encoding_mac_thai_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_TURKISH] = {
+        .name = "macTurkish",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_turkish_alnum_char,
+        .alpha_char = pm_encoding_mac_turkish_alpha_char,
+        .isupper_char = pm_encoding_mac_turkish_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_MAC_UKRAINE] = {
+        .name = "macUkraine",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_mac_ukraine_alnum_char,
+        .alpha_char = pm_encoding_mac_ukraine_alpha_char,
+        .isupper_char = pm_encoding_mac_ukraine_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_SHIFT_JIS] = {
+        .name = "Shift_JIS",
+        .char_width = pm_encoding_shift_jis_char_width,
+        .alnum_char = pm_encoding_shift_jis_alnum_char,
+        .alpha_char = pm_encoding_shift_jis_alpha_char,
+        .isupper_char = pm_encoding_shift_jis_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_SJIS_DOCOMO] = {
+        .name = "SJIS-DoCoMo",
+        .char_width = pm_encoding_shift_jis_char_width,
+        .alnum_char = pm_encoding_shift_jis_alnum_char,
+        .alpha_char = pm_encoding_shift_jis_alpha_char,
+        .isupper_char = pm_encoding_shift_jis_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_SJIS_KDDI] = {
+        .name = "SJIS-KDDI",
+        .char_width = pm_encoding_shift_jis_char_width,
+        .alnum_char = pm_encoding_shift_jis_alnum_char,
+        .alpha_char = pm_encoding_shift_jis_alpha_char,
+        .isupper_char = pm_encoding_shift_jis_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_SJIS_SOFTBANK] = {
+        .name = "SJIS-SoftBank",
+        .char_width = pm_encoding_shift_jis_char_width,
+        .alnum_char = pm_encoding_shift_jis_alnum_char,
+        .alpha_char = pm_encoding_shift_jis_alpha_char,
+        .isupper_char = pm_encoding_shift_jis_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_STATELESS_ISO_2022_JP] = {
+        .name = "stateless-ISO-2022-JP",
+        .char_width = pm_encoding_emacs_mule_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_STATELESS_ISO_2022_JP_KDDI] = {
+        .name = "stateless-ISO-2022-JP-KDDI",
+        .char_width = pm_encoding_emacs_mule_char_width,
+        .alnum_char = pm_encoding_ascii_alnum_char_7bit,
+        .alpha_char = pm_encoding_ascii_alpha_char_7bit,
+        .isupper_char = pm_encoding_ascii_isupper_char_7bit,
+        .multibyte = true
+    },
+    [PM_ENCODING_TIS_620] = {
+        .name = "TIS-620",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_tis_620_alnum_char,
+        .alpha_char = pm_encoding_tis_620_alpha_char,
+        .isupper_char = pm_encoding_tis_620_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_UTF8_MAC] = {
+        .name = "UTF8-MAC",
+        .char_width = pm_encoding_utf_8_char_width,
+        .alnum_char = pm_encoding_utf_8_alnum_char,
+        .alpha_char = pm_encoding_utf_8_alpha_char,
+        .isupper_char = pm_encoding_utf_8_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_UTF8_DOCOMO] = {
+        .name = "UTF8-DoCoMo",
+        .char_width = pm_encoding_utf_8_char_width,
+        .alnum_char = pm_encoding_utf_8_alnum_char,
+        .alpha_char = pm_encoding_utf_8_alpha_char,
+        .isupper_char = pm_encoding_utf_8_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_UTF8_KDDI] = {
+        .name = "UTF8-KDDI",
+        .char_width = pm_encoding_utf_8_char_width,
+        .alnum_char = pm_encoding_utf_8_alnum_char,
+        .alpha_char = pm_encoding_utf_8_alpha_char,
+        .isupper_char = pm_encoding_utf_8_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_UTF8_SOFTBANK] = {
+        .name = "UTF8-SoftBank",
+        .char_width = pm_encoding_utf_8_char_width,
+        .alnum_char = pm_encoding_utf_8_alnum_char,
+        .alpha_char = pm_encoding_utf_8_alpha_char,
+        .isupper_char = pm_encoding_utf_8_isupper_char,
+        .multibyte = true
+    },
+    [PM_ENCODING_WINDOWS_1250] = {
+        .name = "Windows-1250",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1250_alnum_char,
+        .alpha_char = pm_encoding_windows_1250_alpha_char,
+        .isupper_char = pm_encoding_windows_1250_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1251] = {
+        .name = "Windows-1251",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1251_alnum_char,
+        .alpha_char = pm_encoding_windows_1251_alpha_char,
+        .isupper_char = pm_encoding_windows_1251_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1252] = {
+        .name = "Windows-1252",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1252_alnum_char,
+        .alpha_char = pm_encoding_windows_1252_alpha_char,
+        .isupper_char = pm_encoding_windows_1252_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1253] = {
+        .name = "Windows-1253",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1253_alnum_char,
+        .alpha_char = pm_encoding_windows_1253_alpha_char,
+        .isupper_char = pm_encoding_windows_1253_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1254] = {
+        .name = "Windows-1254",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1254_alnum_char,
+        .alpha_char = pm_encoding_windows_1254_alpha_char,
+        .isupper_char = pm_encoding_windows_1254_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1255] = {
+        .name = "Windows-1255",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1255_alnum_char,
+        .alpha_char = pm_encoding_windows_1255_alpha_char,
+        .isupper_char = pm_encoding_windows_1255_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1256] = {
+        .name = "Windows-1256",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1256_alnum_char,
+        .alpha_char = pm_encoding_windows_1256_alpha_char,
+        .isupper_char = pm_encoding_windows_1256_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1257] = {
+        .name = "Windows-1257",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1257_alnum_char,
+        .alpha_char = pm_encoding_windows_1257_alpha_char,
+        .isupper_char = pm_encoding_windows_1257_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_1258] = {
+        .name = "Windows-1258",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_1258_alnum_char,
+        .alpha_char = pm_encoding_windows_1258_alpha_char,
+        .isupper_char = pm_encoding_windows_1258_isupper_char,
+        .multibyte = false
+    },
+    [PM_ENCODING_WINDOWS_874] = {
+        .name = "Windows-874",
+        .char_width = pm_encoding_single_char_width,
+        .alnum_char = pm_encoding_windows_874_alnum_char,
+        .alpha_char = pm_encoding_windows_874_alpha_char,
+        .isupper_char = pm_encoding_windows_874_isupper_char,
+        .multibyte = false
+    }
+#endif
+};
+
+/**
+ * Parse the given name of an encoding and return a pointer to the corresponding
+ * encoding struct if one can be found, otherwise return NULL.
+ */
+const pm_encoding_t *
+pm_encoding_find(const uint8_t *start, const uint8_t *end) {
+    size_t width = (size_t) (end - start);
+
+    // First, we're going to check for UTF-8. This is the most common encoding.
+    // UTF-8 can contain extra information at the end about the platform it is
+    // encoded on, such as UTF-8-MAC or UTF-8-UNIX. We'll ignore those suffixes.
+    if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "UTF-8", 5) == 0)) {
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+        // We need to explicitly handle UTF-8-HFS, as that one needs to switch
+        // over to being UTF8-MAC.
+        if (width == 9 && (pm_strncasecmp(start + 5, (const uint8_t *) "-HFS", 4) == 0)) {
+            return &pm_encodings[PM_ENCODING_UTF8_MAC];
+        }
+#endif
+
+        // Otherwise we'll return the default UTF-8 encoding.
+        return PM_ENCODING_UTF_8_ENTRY;
+    }
+
+    // Next, we're going to loop through each of the encodings that we handle
+    // explicitly. If we found one that we understand, we'll use that value.
+#define ENCODING1(name, encoding) if (width == sizeof(name) - 1 && pm_strncasecmp(start, (const uint8_t *) name, width) == 0) return &pm_encodings[encoding];
+#define ENCODING2(name1, name2, encoding) ENCODING1(name1, encoding) ENCODING1(name2, encoding)
+
+    if (width >= 3) {
+        switch (*start) {
+            case 'A': case 'a':
+                ENCODING1("ASCII", PM_ENCODING_US_ASCII);
+                ENCODING1("ASCII-8BIT", PM_ENCODING_ASCII_8BIT);
+                ENCODING1("ANSI_X3.4-1968", PM_ENCODING_US_ASCII);
+                break;
+            case 'B': case 'b':
+                ENCODING1("BINARY", PM_ENCODING_ASCII_8BIT);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("Big5", PM_ENCODING_BIG5);
+                ENCODING2("Big5-HKSCS", "Big5-HKSCS:2008", PM_ENCODING_BIG5_HKSCS);
+                ENCODING1("Big5-UAO", PM_ENCODING_BIG5_UAO);
+#endif
+                break;
+            case 'C': case 'c':
+                ENCODING1("CP65001", PM_ENCODING_UTF_8);
+                ENCODING2("CP932", "csWindows31J", PM_ENCODING_WINDOWS_31J);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("CESU-8", PM_ENCODING_CESU_8);
+                ENCODING1("CP437", PM_ENCODING_IBM437);
+                ENCODING1("CP720", PM_ENCODING_IBM720);
+                ENCODING1("CP737", PM_ENCODING_IBM737);
+                ENCODING1("CP775", PM_ENCODING_IBM775);
+                ENCODING1("CP850", PM_ENCODING_CP850);
+                ENCODING1("CP852", PM_ENCODING_CP852);
+                ENCODING1("CP855", PM_ENCODING_CP855);
+                ENCODING1("CP857", PM_ENCODING_IBM857);
+                ENCODING1("CP860", PM_ENCODING_IBM860);
+                ENCODING1("CP861", PM_ENCODING_IBM861);
+                ENCODING1("CP862", PM_ENCODING_IBM862);
+                ENCODING1("CP864", PM_ENCODING_IBM864);
+                ENCODING1("CP865", PM_ENCODING_IBM865);
+                ENCODING1("CP866", PM_ENCODING_IBM866);
+                ENCODING1("CP869", PM_ENCODING_IBM869);
+                ENCODING1("CP874", PM_ENCODING_WINDOWS_874);
+                ENCODING1("CP878", PM_ENCODING_KOI8_R);
+                ENCODING1("CP863", PM_ENCODING_IBM863);
+                ENCODING1("CP936", PM_ENCODING_GBK);
+                ENCODING1("CP949", PM_ENCODING_CP949);
+                ENCODING1("CP950", PM_ENCODING_CP950);
+                ENCODING1("CP951", PM_ENCODING_CP951);
+                ENCODING1("CP1250", PM_ENCODING_WINDOWS_1250);
+                ENCODING1("CP1251", PM_ENCODING_WINDOWS_1251);
+                ENCODING1("CP1252", PM_ENCODING_WINDOWS_1252);
+                ENCODING1("CP1253", PM_ENCODING_WINDOWS_1253);
+                ENCODING1("CP1254", PM_ENCODING_WINDOWS_1254);
+                ENCODING1("CP1255", PM_ENCODING_WINDOWS_1255);
+                ENCODING1("CP1256", PM_ENCODING_WINDOWS_1256);
+                ENCODING1("CP1257", PM_ENCODING_WINDOWS_1257);
+                ENCODING1("CP1258", PM_ENCODING_WINDOWS_1258);
+                ENCODING1("CP51932", PM_ENCODING_CP51932);
+#endif
+                break;
+            case 'E': case 'e':
+                ENCODING2("EUC-JP", "eucJP", PM_ENCODING_EUC_JP);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING2("eucJP-ms", "euc-jp-ms", PM_ENCODING_EUC_JP_MS);
+                ENCODING2("EUC-JIS-2004", "EUC-JISX0213", PM_ENCODING_EUC_JIS_2004);
+                ENCODING2("EUC-KR", "eucKR", PM_ENCODING_EUC_KR);
+                ENCODING2("EUC-CN", "eucCN", PM_ENCODING_GB2312);
+                ENCODING2("EUC-TW", "eucTW", PM_ENCODING_EUC_TW);
+                ENCODING1("Emacs-Mule", PM_ENCODING_EMACS_MULE);
+#endif
+                break;
+            case 'G': case 'g':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("GBK", PM_ENCODING_GBK);
+                ENCODING1("GB12345", PM_ENCODING_GB12345);
+                ENCODING1("GB18030", PM_ENCODING_GB18030);
+                ENCODING1("GB1988", PM_ENCODING_GB1988);
+                ENCODING1("GB2312", PM_ENCODING_GB2312);
+#endif
+                break;
+            case 'I': case 'i':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("IBM437", PM_ENCODING_IBM437);
+                ENCODING1("IBM720", PM_ENCODING_IBM720);
+                ENCODING1("IBM737", PM_ENCODING_IBM737);
+                ENCODING1("IBM775", PM_ENCODING_IBM775);
+                ENCODING1("IBM850", PM_ENCODING_CP850);
+                ENCODING1("IBM852", PM_ENCODING_IBM852);
+                ENCODING1("IBM855", PM_ENCODING_IBM855);
+                ENCODING1("IBM857", PM_ENCODING_IBM857);
+                ENCODING1("IBM860", PM_ENCODING_IBM860);
+                ENCODING1("IBM861", PM_ENCODING_IBM861);
+                ENCODING1("IBM862", PM_ENCODING_IBM862);
+                ENCODING1("IBM863", PM_ENCODING_IBM863);
+                ENCODING1("IBM864", PM_ENCODING_IBM864);
+                ENCODING1("IBM865", PM_ENCODING_IBM865);
+                ENCODING1("IBM866", PM_ENCODING_IBM866);
+                ENCODING1("IBM869", PM_ENCODING_IBM869);
+                ENCODING2("ISO-8859-1", "ISO8859-1", PM_ENCODING_ISO_8859_1);
+                ENCODING2("ISO-8859-2", "ISO8859-2", PM_ENCODING_ISO_8859_2);
+                ENCODING2("ISO-8859-3", "ISO8859-3", PM_ENCODING_ISO_8859_3);
+                ENCODING2("ISO-8859-4", "ISO8859-4", PM_ENCODING_ISO_8859_4);
+                ENCODING2("ISO-8859-5", "ISO8859-5", PM_ENCODING_ISO_8859_5);
+                ENCODING2("ISO-8859-6", "ISO8859-6", PM_ENCODING_ISO_8859_6);
+                ENCODING2("ISO-8859-7", "ISO8859-7", PM_ENCODING_ISO_8859_7);
+                ENCODING2("ISO-8859-8", "ISO8859-8", PM_ENCODING_ISO_8859_8);
+                ENCODING2("ISO-8859-9", "ISO8859-9", PM_ENCODING_ISO_8859_9);
+                ENCODING2("ISO-8859-10", "ISO8859-10", PM_ENCODING_ISO_8859_10);
+                ENCODING2("ISO-8859-11", "ISO8859-11", PM_ENCODING_ISO_8859_11);
+                ENCODING2("ISO-8859-13", "ISO8859-13", PM_ENCODING_ISO_8859_13);
+                ENCODING2("ISO-8859-14", "ISO8859-14", PM_ENCODING_ISO_8859_14);
+                ENCODING2("ISO-8859-15", "ISO8859-15", PM_ENCODING_ISO_8859_15);
+                ENCODING2("ISO-8859-16", "ISO8859-16", PM_ENCODING_ISO_8859_16);
+#endif
+                break;
+            case 'K': case 'k':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("KOI8-R", PM_ENCODING_KOI8_R);
+                ENCODING1("KOI8-U", PM_ENCODING_KOI8_U);
+#endif
+                break;
+            case 'M': case 'm':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("macCentEuro", PM_ENCODING_MAC_CENT_EURO);
+                ENCODING1("macCroatian", PM_ENCODING_MAC_CROATIAN);
+                ENCODING1("macCyrillic", PM_ENCODING_MAC_CYRILLIC);
+                ENCODING1("macGreek", PM_ENCODING_MAC_GREEK);
+                ENCODING1("macIceland", PM_ENCODING_MAC_ICELAND);
+                ENCODING1("MacJapanese", PM_ENCODING_MAC_JAPANESE);
+                ENCODING1("MacJapan", PM_ENCODING_MAC_JAPANESE);
+                ENCODING1("macRoman", PM_ENCODING_MAC_ROMAN);
+                ENCODING1("macRomania", PM_ENCODING_MAC_ROMANIA);
+                ENCODING1("macThai", PM_ENCODING_MAC_THAI);
+                ENCODING1("macTurkish", PM_ENCODING_MAC_TURKISH);
+                ENCODING1("macUkraine", PM_ENCODING_MAC_UKRAINE);
+#endif
+                break;
+            case 'P': case 'p':
+                ENCODING1("PCK", PM_ENCODING_WINDOWS_31J);
+                break;
+            case 'S': case 's':
+                ENCODING1("SJIS", PM_ENCODING_WINDOWS_31J);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("Shift_JIS", PM_ENCODING_SHIFT_JIS);
+                ENCODING1("SJIS-DoCoMo", PM_ENCODING_SJIS_DOCOMO);
+                ENCODING1("SJIS-KDDI", PM_ENCODING_SJIS_KDDI);
+                ENCODING1("SJIS-SoftBank", PM_ENCODING_SJIS_SOFTBANK);
+                ENCODING1("stateless-ISO-2022-JP", PM_ENCODING_STATELESS_ISO_2022_JP);
+                ENCODING1("stateless-ISO-2022-JP-KDDI", PM_ENCODING_STATELESS_ISO_2022_JP_KDDI);
+#endif
+                break;
+            case 'T': case 't':
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("TIS-620", PM_ENCODING_TIS_620);
+#endif
+                break;
+            case 'U': case 'u':
+                ENCODING1("US-ASCII", PM_ENCODING_US_ASCII);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING2("UTF8-MAC", "UTF-8-HFS", PM_ENCODING_UTF8_MAC);
+                ENCODING1("UTF8-DoCoMo", PM_ENCODING_UTF8_DOCOMO);
+                ENCODING1("UTF8-KDDI", PM_ENCODING_UTF8_KDDI);
+                ENCODING1("UTF8-SoftBank", PM_ENCODING_UTF8_SOFTBANK);
+#endif
+                break;
+            case 'W': case 'w':
+                ENCODING1("Windows-31J", PM_ENCODING_WINDOWS_31J);
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+                ENCODING1("Windows-874", PM_ENCODING_WINDOWS_874);
+                ENCODING1("Windows-1250", PM_ENCODING_WINDOWS_1250);
+                ENCODING1("Windows-1251", PM_ENCODING_WINDOWS_1251);
+                ENCODING1("Windows-1252", PM_ENCODING_WINDOWS_1252);
+                ENCODING1("Windows-1253", PM_ENCODING_WINDOWS_1253);
+                ENCODING1("Windows-1254", PM_ENCODING_WINDOWS_1254);
+                ENCODING1("Windows-1255", PM_ENCODING_WINDOWS_1255);
+                ENCODING1("Windows-1256", PM_ENCODING_WINDOWS_1256);
+                ENCODING1("Windows-1257", PM_ENCODING_WINDOWS_1257);
+                ENCODING1("Windows-1258", PM_ENCODING_WINDOWS_1258);
+#endif
+                break;
+            case '6':
+                ENCODING1("646", PM_ENCODING_US_ASCII);
+                break;
+        }
+    }
+
+#undef ENCODING2
+#undef ENCODING1
+
+    // If we didn't match any encodings, return NULL.
+    return NULL;
+}
diff --git a/prism/encoding.h b/prism/encoding.h
new file mode 100644
index 0000000000..5f7724821f
--- /dev/null
+++ b/prism/encoding.h
@@ -0,0 +1,283 @@
+/**
+ * @file encoding.h
+ *
+ * The encoding interface and implementations used by the parser.
+ */
+#ifndef PRISM_ENCODING_H
+#define PRISM_ENCODING_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_strncasecmp.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * This struct defines the functions necessary to implement the encoding
+ * interface so we can determine how many bytes the subsequent character takes.
+ * Each callback should return the number of bytes, or 0 if the next bytes are
+ * invalid for the encoding and type.
+ */
+typedef struct {
+    /**
+     * Return the number of bytes that the next character takes if it is valid
+     * in the encoding. Does not read more than n bytes. It is assumed that n is
+     * at least 1.
+     */
+    size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
+
+    /**
+     * Return the number of bytes that the next character takes if it is valid
+     * in the encoding and is alphabetical. Does not read more than n bytes. It
+     * is assumed that n is at least 1.
+     */
+    size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
+
+    /**
+     * Return the number of bytes that the next character takes if it is valid
+     * in the encoding and is alphanumeric. Does not read more than n bytes. It
+     * is assumed that n is at least 1.
+     */
+    size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
+
+    /**
+     * Return true if the next character is valid in the encoding and is an
+     * uppercase character. Does not read more than n bytes. It is assumed that
+     * n is at least 1.
+     */
+    bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
+
+    /**
+     * The name of the encoding. This should correspond to a value that can be
+     * passed to Encoding.find in Ruby.
+     */
+    const char *name;
+
+    /**
+     * Return true if the encoding is a multibyte encoding.
+     */
+    bool multibyte;
+} pm_encoding_t;
+
+/**
+ * All of the lookup tables use the first bit of each embedded byte to indicate
+ * whether the codepoint is alphabetical.
+ */
+#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
+
+/**
+ * All of the lookup tables use the second bit of each embedded byte to indicate
+ * whether the codepoint is alphanumeric.
+ */
+#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
+
+/**
+ * All of the lookup tables use the third bit of each embedded byte to indicate
+ * whether the codepoint is uppercase.
+ */
+#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
+
+/**
+ * Return the size of the next character in the UTF-8 encoding.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ *     the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
+
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphabetical character.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ *     the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
+
+/**
+ * Return the size of the next character in the UTF-8 encoding if it is an
+ * alphanumeric character.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns The number of bytes that the next character takes if it is valid in
+ *     the encoding, or 0 if it is not.
+ */
+size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
+
+/**
+ * Return true if the next character in the UTF-8 encoding if it is an uppercase
+ * character.
+ *
+ * @param b The bytes to read.
+ * @param n The number of bytes that can be read.
+ * @returns True if the next character is valid in the encoding and is an
+ *     uppercase character, or false if it is not.
+ */
+bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
+
+/**
+ * This lookup table is referenced in both the UTF-8 encoding file and the
+ * parser directly in order to speed up the default encoding processing. It is
+ * used to indicate whether a character is alphabetical, alphanumeric, or
+ * uppercase in unicode mappings.
+ */
+extern const uint8_t pm_encoding_unicode_table[256];
+
+/**
+ * These are all of the encodings that prism supports.
+ */
+typedef enum {
+    PM_ENCODING_UTF_8 = 0,
+    PM_ENCODING_US_ASCII,
+    PM_ENCODING_ASCII_8BIT,
+    PM_ENCODING_EUC_JP,
+    PM_ENCODING_WINDOWS_31J,
+
+// We optionally support excluding the full set of encodings to only support the
+// minimum necessary to process Ruby code without encoding comments.
+#ifndef PRISM_ENCODING_EXCLUDE_FULL
+    PM_ENCODING_BIG5,
+    PM_ENCODING_BIG5_HKSCS,
+    PM_ENCODING_BIG5_UAO,
+    PM_ENCODING_CESU_8,
+    PM_ENCODING_CP51932,
+    PM_ENCODING_CP850,
+    PM_ENCODING_CP852,
+    PM_ENCODING_CP855,
+    PM_ENCODING_CP949,
+    PM_ENCODING_CP950,
+    PM_ENCODING_CP951,
+    PM_ENCODING_EMACS_MULE,
+    PM_ENCODING_EUC_JP_MS,
+    PM_ENCODING_EUC_JIS_2004,
+    PM_ENCODING_EUC_KR,
+    PM_ENCODING_EUC_TW,
+    PM_ENCODING_GB12345,
+    PM_ENCODING_GB18030,
+    PM_ENCODING_GB1988,
+    PM_ENCODING_GB2312,
+    PM_ENCODING_GBK,
+    PM_ENCODING_IBM437,
+    PM_ENCODING_IBM720,
+    PM_ENCODING_IBM737,
+    PM_ENCODING_IBM775,
+    PM_ENCODING_IBM852,
+    PM_ENCODING_IBM855,
+    PM_ENCODING_IBM857,
+    PM_ENCODING_IBM860,
+    PM_ENCODING_IBM861,
+    PM_ENCODING_IBM862,
+    PM_ENCODING_IBM863,
+    PM_ENCODING_IBM864,
+    PM_ENCODING_IBM865,
+    PM_ENCODING_IBM866,
+    PM_ENCODING_IBM869,
+    PM_ENCODING_ISO_8859_1,
+    PM_ENCODING_ISO_8859_2,
+    PM_ENCODING_ISO_8859_3,
+    PM_ENCODING_ISO_8859_4,
+    PM_ENCODING_ISO_8859_5,
+    PM_ENCODING_ISO_8859_6,
+    PM_ENCODING_ISO_8859_7,
+    PM_ENCODING_ISO_8859_8,
+    PM_ENCODING_ISO_8859_9,
+    PM_ENCODING_ISO_8859_10,
+    PM_ENCODING_ISO_8859_11,
+    PM_ENCODING_ISO_8859_13,
+    PM_ENCODING_ISO_8859_14,
+    PM_ENCODING_ISO_8859_15,
+    PM_ENCODING_ISO_8859_16,
+    PM_ENCODING_KOI8_R,
+    PM_ENCODING_KOI8_U,
+    PM_ENCODING_MAC_CENT_EURO,
+    PM_ENCODING_MAC_CROATIAN,
+    PM_ENCODING_MAC_CYRILLIC,
+    PM_ENCODING_MAC_GREEK,
+    PM_ENCODING_MAC_ICELAND,
+    PM_ENCODING_MAC_JAPANESE,
+    PM_ENCODING_MAC_ROMAN,
+    PM_ENCODING_MAC_ROMANIA,
+    PM_ENCODING_MAC_THAI,
+    PM_ENCODING_MAC_TURKISH,
+    PM_ENCODING_MAC_UKRAINE,
+    PM_ENCODING_SHIFT_JIS,
+    PM_ENCODING_SJIS_DOCOMO,
+    PM_ENCODING_SJIS_KDDI,
+    PM_ENCODING_SJIS_SOFTBANK,
+    PM_ENCODING_STATELESS_ISO_2022_JP,
+    PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
+    PM_ENCODING_TIS_620,
+    PM_ENCODING_UTF8_MAC,
+    PM_ENCODING_UTF8_DOCOMO,
+    PM_ENCODING_UTF8_KDDI,
+    PM_ENCODING_UTF8_SOFTBANK,
+    PM_ENCODING_WINDOWS_1250,
+    PM_ENCODING_WINDOWS_1251,
+    PM_ENCODING_WINDOWS_1252,
+    PM_ENCODING_WINDOWS_1253,
+    PM_ENCODING_WINDOWS_1254,
+    PM_ENCODING_WINDOWS_1255,
+    PM_ENCODING_WINDOWS_1256,
+    PM_ENCODING_WINDOWS_1257,
+    PM_ENCODING_WINDOWS_1258,
+    PM_ENCODING_WINDOWS_874,
+#endif
+
+    PM_ENCODING_MAXIMUM
+} pm_encoding_type_t;
+
+/**
+ * This is the table of all of the encodings that prism supports.
+ */
+extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
+
+/**
+ * This is the default UTF-8 encoding. We need a reference to it to quickly
+ * create parsers.
+ */
+#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
+
+/**
+ * This is the US-ASCII encoding. We need a reference to it to be able to
+ * compare against it when a string is being created because it could possibly
+ * need to fall back to ASCII-8BIT.
+ */
+#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
+
+/**
+ * This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
+ * can compare against it because invalid multibyte characters are not a thing
+ * in this encoding. It is also needed for handling Regexp encoding flags.
+ */
+#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
+
+/**
+ * This is the EUC-JP encoding. We need a reference to it to quickly process
+ * regular expression modifiers.
+ */
+#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
+
+/**
+ * This is the Windows-31J encoding. We need a reference to it to quickly
+ * process regular expression modifiers.
+ */
+#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
+
+/**
+ * Parse the given name of an encoding and return a pointer to the corresponding
+ * encoding struct if one can be found, otherwise return NULL.
+ *
+ * @param start A pointer to the first byte of the name.
+ * @param end A pointer to the last byte of the name.
+ * @returns A pointer to the encoding struct if one is found, otherwise NULL.
+ */
+const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
+
+#endif
diff --git a/prism/extension.c b/prism/extension.c
new file mode 100644
index 0000000000..71c2d91b98
--- /dev/null
+++ b/prism/extension.c
@@ -0,0 +1,1427 @@
+#include "prism/extension.h"
+
+#ifdef _WIN32
+#include <ruby/win32.h>
+#endif
+
+// NOTE: this file should contain only bindings. All non-trivial logic should be
+// in libprism so it can be shared its the various callers.
+
+VALUE rb_cPrism;
+VALUE rb_cPrismNode;
+VALUE rb_cPrismSource;
+VALUE rb_cPrismToken;
+VALUE rb_cPrismLocation;
+
+VALUE rb_cPrismComment;
+VALUE rb_cPrismInlineComment;
+VALUE rb_cPrismEmbDocComment;
+VALUE rb_cPrismMagicComment;
+VALUE rb_cPrismParseError;
+VALUE rb_cPrismParseWarning;
+VALUE rb_cPrismResult;
+VALUE rb_cPrismParseResult;
+VALUE rb_cPrismLexResult;
+VALUE rb_cPrismParseLexResult;
+VALUE rb_cPrismStringQuery;
+VALUE rb_cPrismScope;
+VALUE rb_cPrismCurrentVersionError;
+
+VALUE rb_cPrismDebugEncoding;
+
+ID rb_id_option_command_line;
+ID rb_id_option_encoding;
+ID rb_id_option_filepath;
+ID rb_id_option_freeze;
+ID rb_id_option_frozen_string_literal;
+ID rb_id_option_line;
+ID rb_id_option_main_script;
+ID rb_id_option_partial_script;
+ID rb_id_option_scopes;
+ID rb_id_option_version;
+ID rb_id_source_for;
+ID rb_id_forwarding_positionals;
+ID rb_id_forwarding_keywords;
+ID rb_id_forwarding_block;
+ID rb_id_forwarding_all;
+
+/******************************************************************************/
+/* IO of Ruby code                                                            */
+/******************************************************************************/
+
+/**
+ * Check if the given VALUE is a string. If it's not a string, then raise a
+ * TypeError. Otherwise return the VALUE as a C string.
+ */
+static const char *
+check_string(VALUE value) {
+    // Check if the value is a string. If it's not, then raise a type error.
+    if (!RB_TYPE_P(value, T_STRING)) {
+        rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(value));
+    }
+
+    // Otherwise, return the value as a C string.
+    return RSTRING_PTR(value);
+}
+
+/**
+ * Load the contents and size of the given string into the given pm_string_t.
+ */
+static void
+input_load_string(pm_string_t *input, VALUE string) {
+    // Check if the string is a string. If it's not, then raise a type error.
+    if (!RB_TYPE_P(string, T_STRING)) {
+        rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string));
+    }
+
+    pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
+}
+
+/******************************************************************************/
+/* Building C options from Ruby options                                       */
+/******************************************************************************/
+
+/**
+ * Build the scopes associated with the provided Ruby keyword value.
+ */
+static void
+build_options_scopes(pm_options_t *options, VALUE scopes) {
+    // Check if the value is an array. If it's not, then raise a type error.
+    if (!RB_TYPE_P(scopes, T_ARRAY)) {
+        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes));
+    }
+
+    // Initialize the scopes array.
+    size_t scopes_count = RARRAY_LEN(scopes);
+    if (!pm_options_scopes_init(options, scopes_count)) {
+        rb_raise(rb_eNoMemError, "failed to allocate memory");
+    }
+
+    // Iterate over the scopes and add them to the options.
+    for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
+        VALUE scope = rb_ary_entry(scopes, scope_index);
+
+        // The scope can be either an array or it can be a Prism::Scope object.
+        // Parse out the correct values here from either.
+        VALUE locals;
+        uint8_t forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE;
+
+        if (RB_TYPE_P(scope, T_ARRAY)) {
+            locals = scope;
+        } else if (rb_obj_is_kind_of(scope, rb_cPrismScope)) {
+            locals = rb_ivar_get(scope, rb_intern("@locals"));
+            if (!RB_TYPE_P(locals, T_ARRAY)) {
+                rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(locals));
+            }
+
+            VALUE names = rb_ivar_get(scope, rb_intern("@forwarding"));
+            if (!RB_TYPE_P(names, T_ARRAY)) {
+                rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(names));
+            }
+
+            size_t names_count = RARRAY_LEN(names);
+            for (size_t name_index = 0; name_index < names_count; name_index++) {
+                VALUE name = rb_ary_entry(names, name_index);
+
+                // Check that the name is a symbol. If it's not, then raise
+                // a type error.
+                if (!RB_TYPE_P(name, T_SYMBOL)) {
+                    rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(name));
+                }
+
+                ID id = SYM2ID(name);
+                if (id == rb_id_forwarding_positionals) {
+                    forwarding |= PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS;
+                } else if (id == rb_id_forwarding_keywords) {
+                    forwarding |= PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS;
+                } else if (id == rb_id_forwarding_block) {
+                    forwarding |= PM_OPTIONS_SCOPE_FORWARDING_BLOCK;
+                } else if (id == rb_id_forwarding_all) {
+                    forwarding |= PM_OPTIONS_SCOPE_FORWARDING_ALL;
+                } else {
+                    rb_raise(rb_eArgError, "invalid forwarding value: %" PRIsVALUE, name);
+                }
+            }
+        } else {
+            rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array or Prism::Scope)", rb_obj_class(scope));
+        }
+
+        // Initialize the scope array.
+        size_t locals_count = RARRAY_LEN(locals);
+        pm_options_scope_t *options_scope = &options->scopes[scope_index];
+        if (!pm_options_scope_init(options_scope, locals_count)) {
+            rb_raise(rb_eNoMemError, "failed to allocate memory");
+        }
+
+        // Iterate over the locals and add them to the scope.
+        for (size_t local_index = 0; local_index < locals_count; local_index++) {
+            VALUE local = rb_ary_entry(locals, local_index);
+
+            // Check that the local is a symbol. If it's not, then raise a
+            // type error.
+            if (!RB_TYPE_P(local, T_SYMBOL)) {
+                rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local));
+            }
+
+            // Add the local to the scope.
+            pm_string_t *scope_local = &options_scope->locals[local_index];
+            const char *name = rb_id2name(SYM2ID(local));
+            pm_string_constant_init(scope_local, name, strlen(name));
+        }
+
+        // Now set the forwarding options.
+        pm_options_scope_forwarding_set(options_scope, forwarding);
+    }
+}
+
+/**
+ * An iterator function that is called for each key-value in the keywords hash.
+ */
+static int
+build_options_i(VALUE key, VALUE value, VALUE argument) {
+    pm_options_t *options = (pm_options_t *) argument;
+    ID key_id = SYM2ID(key);
+
+    if (key_id == rb_id_option_filepath) {
+        if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
+    } else if (key_id == rb_id_option_encoding) {
+        if (!NIL_P(value)) {
+            if (value == Qfalse) {
+                pm_options_encoding_locked_set(options, true);
+            } else {
+                pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
+            }
+        }
+    } else if (key_id == rb_id_option_line) {
+        if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
+    } else if (key_id == rb_id_option_frozen_string_literal) {
+        if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, RTEST(value));
+    } else if (key_id == rb_id_option_version) {
+        if (!NIL_P(value)) {
+            const char *version = check_string(value);
+
+            if (RSTRING_LEN(value) == 7 && strncmp(version, "current", 7) == 0) {
+                const char *current_version = RSTRING_PTR(rb_const_get(rb_cObject, rb_intern("RUBY_VERSION")));
+                if (!pm_options_version_set(options, current_version, 3)) {
+                    rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, current_version));
+                }
+            } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
+                rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
+            }
+        }
+    } else if (key_id == rb_id_option_scopes) {
+        if (!NIL_P(value)) build_options_scopes(options, value);
+    } else if (key_id == rb_id_option_command_line) {
+        if (!NIL_P(value)) {
+            const char *string = check_string(value);
+            uint8_t command_line = 0;
+
+            for (size_t index = 0; index < strlen(string); index++) {
+                switch (string[index]) {
+                    case 'a': command_line |= PM_OPTIONS_COMMAND_LINE_A; break;
+                    case 'e': command_line |= PM_OPTIONS_COMMAND_LINE_E; break;
+                    case 'l': command_line |= PM_OPTIONS_COMMAND_LINE_L; break;
+                    case 'n': command_line |= PM_OPTIONS_COMMAND_LINE_N; break;
+                    case 'p': command_line |= PM_OPTIONS_COMMAND_LINE_P; break;
+                    case 'x': command_line |= PM_OPTIONS_COMMAND_LINE_X; break;
+                    default: rb_raise(rb_eArgError, "invalid command line flag: '%c'", string[index]); break;
+                }
+            }
+
+            pm_options_command_line_set(options, command_line);
+        }
+    } else if (key_id == rb_id_option_main_script) {
+        if (!NIL_P(value)) pm_options_main_script_set(options, RTEST(value));
+    } else if (key_id == rb_id_option_partial_script) {
+        if (!NIL_P(value)) pm_options_partial_script_set(options, RTEST(value));
+    } else if (key_id == rb_id_option_freeze) {
+        if (!NIL_P(value)) pm_options_freeze_set(options, RTEST(value));
+    } else {
+        rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key);
+    }
+
+    return ST_CONTINUE;
+}
+
+/**
+ * We need a struct here to pass through rb_protect and it has to be a single
+ * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this
+ * through as an opaque pointer and cast it on both sides.
+ */
+struct build_options_data {
+    pm_options_t *options;
+    VALUE keywords;
+};
+
+/**
+ * Build the set of options from the given keywords. Note that this can raise a
+ * Ruby error if the options are not valid.
+ */
+static VALUE
+build_options(VALUE argument) {
+    struct build_options_data *data = (struct build_options_data *) argument;
+    rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options);
+    return Qnil;
+}
+
+/**
+ * Extract the options from the given keyword arguments.
+ */
+static void
+extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
+    options->line = 1; // default
+
+    if (!NIL_P(keywords)) {
+        struct build_options_data data = { .options = options, .keywords = keywords };
+        struct build_options_data *argument = &data;
+
+        int state = 0;
+        rb_protect(build_options, (VALUE) argument, &state);
+
+        if (state != 0) {
+            pm_options_free(options);
+            rb_jump_tag(state);
+        }
+    }
+
+    if (!NIL_P(filepath)) {
+        if (!RB_TYPE_P(filepath, T_STRING)) {
+            pm_options_free(options);
+            rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
+        }
+
+        pm_options_filepath_set(options, RSTRING_PTR(filepath));
+    }
+}
+
+/**
+ * Read options for methods that look like (source, **options).
+ */
+static void
+string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
+    VALUE string;
+    VALUE keywords;
+    rb_scan_args(argc, argv, "1:", &string, &keywords);
+
+    extract_options(options, Qnil, keywords);
+    input_load_string(input, string);
+}
+
+/**
+ * Read options for methods that look like (filepath, **options).
+ */
+static void
+file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
+    VALUE filepath;
+    VALUE keywords;
+    rb_scan_args(argc, argv, "1:", &filepath, &keywords);
+
+    Check_Type(filepath, T_STRING);
+    *encoded_filepath = rb_str_encode_ospath(filepath);
+    extract_options(options, *encoded_filepath, keywords);
+
+    const char *source = (const char *) pm_string_source(&options->filepath);
+    pm_string_init_result_t result;
+
+    switch (result = pm_string_file_init(input, source)) {
+        case PM_STRING_INIT_SUCCESS:
+            break;
+        case PM_STRING_INIT_ERROR_GENERIC: {
+            pm_options_free(options);
+
+#ifdef _WIN32
+            int e = rb_w32_map_errno(GetLastError());
+#else
+            int e = errno;
+#endif
+
+            rb_syserr_fail(e, source);
+            break;
+        }
+        case PM_STRING_INIT_ERROR_DIRECTORY:
+            pm_options_free(options);
+            rb_syserr_fail(EISDIR, source);
+            break;
+        default:
+            pm_options_free(options);
+            rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
+            break;
+    }
+}
+
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+/******************************************************************************/
+/* Serializing the AST                                                        */
+/******************************************************************************/
+
+/**
+ * Dump the AST corresponding to the given input to a string.
+ */
+static VALUE
+dump_input(pm_string_t *input, const pm_options_t *options) {
+    pm_buffer_t buffer;
+    if (!pm_buffer_init(&buffer)) {
+        rb_raise(rb_eNoMemError, "failed to allocate memory");
+    }
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+
+    pm_node_t *node = pm_parse(&parser);
+    pm_serialize(&parser, node, &buffer);
+
+    VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
+    pm_node_destroy(&parser, node);
+    pm_buffer_free(&buffer);
+    pm_parser_free(&parser);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::dump(source, **options) -> String
+ *
+ * Dump the AST corresponding to the given string to a string. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+dump(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+#ifdef PRISM_BUILD_DEBUG
+    size_t length = pm_string_length(&input);
+    char* dup = xmalloc(length);
+    memcpy(dup, pm_string_source(&input), length);
+    pm_string_constant_init(&input, dup, length);
+#endif
+
+    VALUE value = dump_input(&input, &options);
+    if (options.freeze) rb_obj_freeze(value);
+
+#ifdef PRISM_BUILD_DEBUG
+    xfree(dup);
+#endif
+
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+/**
+ * call-seq:
+ *   Prism::dump_file(filepath, **options) -> String
+ *
+ * Dump the AST corresponding to the given file to a string. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+dump_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    VALUE value = dump_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+#endif
+
+/******************************************************************************/
+/* Extracting values for the parse result                                     */
+/******************************************************************************/
+
+/**
+ * The same as rb_class_new_instance, but accepts an additional boolean to
+ * indicate whether or not the resulting class instance should be frozen.
+ */
+static inline VALUE
+rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool freeze) {
+    VALUE value = rb_class_new_instance(argc, argv, klass);
+    if (freeze) rb_obj_freeze(value);
+    return value;
+}
+
+/**
+ * Create a new Location instance from the given parser and bounds.
+ */
+static inline VALUE
+parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
+    VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
+    return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
+}
+
+/**
+ * Create a new Location instance from the given parser and location.
+ */
+#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
+    parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
+
+/**
+ * Build a new Comment instance from the given parser and comment.
+ */
+static inline VALUE
+parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
+    VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
+    VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
+    return rb_class_new_instance_freeze(1, argv, type, freeze);
+}
+
+/**
+ * Extract the comments out of the parser into an array.
+ */
+static VALUE
+parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
+    VALUE comments = rb_ary_new_capa(parser->comment_list.size);
+
+    for (
+        const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head;
+        comment != NULL;
+        comment = (const pm_comment_t *) comment->node.next
+    ) {
+        VALUE value = parser_comment(parser, source, freeze, comment);
+        rb_ary_push(comments, value);
+    }
+
+    if (freeze) rb_obj_freeze(comments);
+    return comments;
+}
+
+/**
+ * Build a new MagicComment instance from the given parser and magic comment.
+ */
+static inline VALUE
+parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
+    VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
+    VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
+    VALUE argv[] = { key_loc, value_loc };
+    return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
+}
+
+/**
+ * Extract the magic comments out of the parser into an array.
+ */
+static VALUE
+parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
+    VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
+
+    for (
+        const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head;
+        magic_comment != NULL;
+        magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
+    ) {
+        VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
+        rb_ary_push(magic_comments, value);
+    }
+
+    if (freeze) rb_obj_freeze(magic_comments);
+    return magic_comments;
+}
+
+/**
+ * Extract out the data location from the parser into a Location instance if one
+ * exists.
+ */
+static VALUE
+parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
+    if (parser->data_loc.end == NULL) {
+        return Qnil;
+    } else {
+        return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
+    }
+}
+
+/**
+ * Extract the errors out of the parser into an array.
+ */
+static VALUE
+parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
+    VALUE errors = rb_ary_new_capa(parser->error_list.size);
+
+    for (
+        const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head;
+        error != NULL;
+        error = (const pm_diagnostic_t *) error->node.next
+    ) {
+        VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
+        VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
+        VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
+
+        VALUE level = Qnil;
+        switch (error->level) {
+            case PM_ERROR_LEVEL_SYNTAX:
+                level = ID2SYM(rb_intern("syntax"));
+                break;
+            case PM_ERROR_LEVEL_ARGUMENT:
+                level = ID2SYM(rb_intern("argument"));
+                break;
+            case PM_ERROR_LEVEL_LOAD:
+                level = ID2SYM(rb_intern("load"));
+                break;
+            default:
+                rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
+        }
+
+        VALUE argv[] = { type, message, location, level };
+        VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze);
+        rb_ary_push(errors, value);
+    }
+
+    if (freeze) rb_obj_freeze(errors);
+    return errors;
+}
+
+/**
+ * Extract the warnings out of the parser into an array.
+ */
+static VALUE
+parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
+    VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
+
+    for (
+        const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head;
+        warning != NULL;
+        warning = (const pm_diagnostic_t *) warning->node.next
+    ) {
+        VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
+        VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
+        VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
+
+        VALUE level = Qnil;
+        switch (warning->level) {
+            case PM_WARNING_LEVEL_DEFAULT:
+                level = ID2SYM(rb_intern("default"));
+                break;
+            case PM_WARNING_LEVEL_VERBOSE:
+                level = ID2SYM(rb_intern("verbose"));
+                break;
+            default:
+                rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
+        }
+
+        VALUE argv[] = { type, message, location, level };
+        VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze);
+        rb_ary_push(warnings, value);
+    }
+
+    if (freeze) rb_obj_freeze(warnings);
+    return warnings;
+}
+
+/**
+ * Create a new parse result from the given parser, value, encoding, and source.
+ */
+static VALUE
+parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source, bool freeze) {
+    VALUE result_argv[] = {
+        value,
+        parser_comments(parser, source, freeze),
+        parser_magic_comments(parser, source, freeze),
+        parser_data_loc(parser, source, freeze),
+        parser_errors(parser, encoding, source, freeze),
+        parser_warnings(parser, encoding, source, freeze),
+        source
+    };
+
+    return rb_class_new_instance_freeze(7, result_argv, class, freeze);
+}
+
+/******************************************************************************/
+/* Lexing Ruby code                                                           */
+/******************************************************************************/
+
+/**
+ * This struct gets stored in the parser and passed in to the lex callback any
+ * time a new token is found. We use it to store the necessary information to
+ * initialize a Token instance.
+ */
+typedef struct {
+    VALUE source;
+    VALUE tokens;
+    rb_encoding *encoding;
+    bool freeze;
+} parse_lex_data_t;
+
+/**
+ * This is passed as a callback to the parser. It gets called every time a new
+ * token is found. Once found, we initialize a new instance of Token and push it
+ * onto the tokens array.
+ */
+static void
+parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
+    parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
+
+    VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze);
+    VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state));
+
+    if (parse_lex_data->freeze) {
+        rb_obj_freeze(value);
+        rb_obj_freeze(yields);
+    }
+
+    rb_ary_push(parse_lex_data->tokens, yields);
+}
+
+/**
+ * This is called whenever the encoding changes based on the magic comment at
+ * the top of the file. We use it to update the encoding that we are using to
+ * create tokens.
+ */
+static void
+parse_lex_encoding_changed_callback(pm_parser_t *parser) {
+    parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
+    parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
+
+    // Since the encoding changed, we need to go back and change the encoding of
+    // the tokens that were already lexed. This is only going to end up being
+    // one or two tokens, since the encoding can only change at the top of the
+    // file.
+    VALUE tokens = parse_lex_data->tokens;
+    VALUE next_tokens = rb_ary_new();
+
+    for (long index = 0; index < RARRAY_LEN(tokens); index++) {
+        VALUE yields = rb_ary_entry(tokens, index);
+        VALUE token = rb_ary_entry(yields, 0);
+
+        VALUE value = rb_ivar_get(token, rb_intern("@value"));
+        VALUE next_value = rb_str_dup(value);
+
+        rb_enc_associate(next_value, parse_lex_data->encoding);
+        if (parse_lex_data->freeze) rb_obj_freeze(next_value);
+
+        VALUE next_token_argv[] = {
+            parse_lex_data->source,
+            rb_ivar_get(token, rb_intern("@type")),
+            next_value,
+            rb_ivar_get(token, rb_intern("@location"))
+        };
+
+        VALUE next_token = rb_class_new_instance(4, next_token_argv, rb_cPrismToken);
+        VALUE next_yields = rb_assoc_new(next_token, rb_ary_entry(yields, 1));
+
+        if (parse_lex_data->freeze) {
+            rb_obj_freeze(next_token);
+            rb_obj_freeze(next_yields);
+        }
+
+        rb_ary_push(next_tokens, next_yields);
+    }
+
+    rb_ary_replace(parse_lex_data->tokens, next_tokens);
+}
+
+/**
+ * Parse the given input and return a ParseResult containing just the tokens or
+ * the nodes and tokens.
+ */
+static VALUE
+parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+    pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
+
+    VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
+    VALUE offsets = rb_ary_new_capa(parser.newline_list.size);
+    VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
+
+    parse_lex_data_t parse_lex_data = {
+        .source = source,
+        .tokens = rb_ary_new(),
+        .encoding = rb_utf8_encoding(),
+        .freeze = options->freeze,
+    };
+
+    parse_lex_data_t *data = &parse_lex_data;
+    pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
+        .data = (void *) data,
+        .callback = parse_lex_token,
+    };
+
+    parser.lex_callback = &lex_callback;
+    pm_node_t *node = pm_parse(&parser);
+
+    // Here we need to update the Source object to have the correct
+    // encoding for the source string and the correct newline offsets.
+    // We do it here because we've already created the Source object and given
+    // it over to all of the tokens, and both of these are only set after pm_parse().
+    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+    rb_enc_associate(source_string, encoding);
+
+    for (size_t index = 0; index < parser.newline_list.size; index++) {
+        rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
+    }
+
+    if (options->freeze) {
+        rb_obj_freeze(source_string);
+        rb_obj_freeze(offsets);
+        rb_obj_freeze(source);
+        rb_obj_freeze(parse_lex_data.tokens);
+    }
+
+    VALUE result;
+    if (return_nodes) {
+        VALUE value = rb_ary_new_capa(2);
+        rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze));
+        rb_ary_push(value, parse_lex_data.tokens);
+        if (options->freeze) rb_obj_freeze(value);
+        result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze);
+    } else {
+        result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze);
+    }
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::lex(source, **options) -> LexResult
+ *
+ * Return a LexResult instance that contains an array of Token instances
+ * corresponding to the given string. For supported options, see Prism::parse.
+ */
+static VALUE
+lex(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+    VALUE result = parse_lex_input(&input, &options, false);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::lex_file(filepath, **options) -> LexResult
+ *
+ * Return a LexResult instance that contains an array of Token instances
+ * corresponding to the given file. For supported options, see Prism::parse.
+ */
+static VALUE
+lex_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    VALUE value = parse_lex_input(&input, &options, false);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+/******************************************************************************/
+/* Parsing Ruby code                                                          */
+/******************************************************************************/
+
+/**
+ * Parse the given input and return a ParseResult instance.
+ */
+static VALUE
+parse_input(pm_string_t *input, const pm_options_t *options) {
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+
+    pm_node_t *node = pm_parse(&parser);
+    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+
+    VALUE source = pm_source_new(&parser, encoding, options->freeze);
+    VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze);
+    VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze);
+
+    if (options->freeze) {
+        rb_obj_freeze(source);
+    }
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse(source, **options) -> ParseResult
+ *
+ * Parse the given string and return a ParseResult instance. The options that
+ * are supported are:
+ *
+ * * `command_line` - either nil or a string of the various options that were
+ *       set on the command line. Valid values are combinations of "a", "l",
+ *       "n", "p", and "x".
+ * * `encoding` - the encoding of the source being parsed. This should be an
+ *       encoding or nil.
+ * * `filepath` - the filepath of the source being parsed. This should be a
+ *       string or nil.
+ * * `freeze` - whether or not to deeply freeze the AST. This should be a
+ *       boolean or nil.
+ * * `frozen_string_literal` - whether or not the frozen string literal pragma
+ *       has been set. This should be a boolean or nil.
+ * * `line` - the line number that the parse starts on. This should be an
+ *       integer or nil. Note that this is 1-indexed.
+ * * `main_script` - a boolean indicating whether or not the source being parsed
+ *       is the main script being run by the interpreter. This controls whether
+ *       or not shebangs are parsed for additional flags and whether or not the
+ *       parser will attempt to find a matching shebang if the first one does
+ *       not contain the word "ruby".
+ * * `partial_script` - when the file being parsed is considered a "partial"
+ *       script, jumps will not be marked as errors if they are not contained
+ *       within loops/blocks. This is used in the case that you're parsing a
+ *       script that you know will be embedded inside another script later, but
+ *       you do not have that context yet. For example, when parsing an ERB
+ *       template that will be evaluated inside another script.
+ * * `scopes` - the locals that are in scope surrounding the code that is being
+ *       parsed. This should be an array of arrays of symbols or nil. Scopes are
+ *       ordered from the outermost scope to the innermost one.
+ * * `version` - the version of Ruby syntax that prism should used to parse Ruby
+ *       code. By default prism assumes you want to parse with the latest
+ *       version of Ruby syntax (which you can trigger with `nil` or
+ *       `"latest"`). You may also restrict the syntax to a specific version of
+ *       Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that
+ *       the current Ruby is running use `version: "current"`. Raises
+ *       ArgumentError if the version is not currently supported by Prism.
+ */
+static VALUE
+parse(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+#ifdef PRISM_BUILD_DEBUG
+    size_t length = pm_string_length(&input);
+    char* dup = xmalloc(length);
+    memcpy(dup, pm_string_source(&input), length);
+    pm_string_constant_init(&input, dup, length);
+#endif
+
+    VALUE value = parse_input(&input, &options);
+
+#ifdef PRISM_BUILD_DEBUG
+    xfree(dup);
+#endif
+
+    pm_string_free(&input);
+    pm_options_free(&options);
+    return value;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_file(filepath, **options) -> ParseResult
+ *
+ * Parse the given file and return a ParseResult instance. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+parse_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    VALUE value = parse_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+/**
+ * Parse the given input and return nothing.
+ */
+static void
+profile_input(pm_string_t *input, const pm_options_t *options) {
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+
+    pm_node_t *node = pm_parse(&parser);
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+}
+
+/**
+ * call-seq:
+ *   Prism::profile(source, **options) -> nil
+ *
+ * Parse the given string and return nothing. This method is meant to allow
+ * profilers to avoid the overhead of reifying the AST to Ruby. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+profile(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    string_options(argc, argv, &input, &options);
+    profile_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return Qnil;
+}
+
+/**
+ * call-seq:
+ *   Prism::profile_file(filepath, **options) -> nil
+ *
+ * Parse the given file and return nothing. This method is meant to allow
+ * profilers to avoid the overhead of reifying the AST to Ruby. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+profile_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    profile_input(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return Qnil;
+}
+
+static int
+parse_stream_eof(void *stream) {
+    if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
+        return 1;
+    }
+    return 0;
+}
+
+/**
+ * An implementation of fgets that is suitable for use with Ruby IO objects.
+ */
+static char *
+parse_stream_fgets(char *string, int size, void *stream) {
+    RUBY_ASSERT(size > 0);
+
+    VALUE line = rb_funcall((VALUE) stream, rb_intern("gets"), 1, INT2FIX(size - 1));
+    if (NIL_P(line)) {
+        return NULL;
+    }
+
+    const char *cstr = RSTRING_PTR(line);
+    long length = RSTRING_LEN(line);
+
+    memcpy(string, cstr, length);
+    string[length] = '\0';
+
+    return string;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_stream(stream, **options) -> ParseResult
+ *
+ * Parse the given object that responds to `gets` and return a ParseResult
+ * instance. The options that are supported are the same as Prism::parse.
+ */
+static VALUE
+parse_stream(int argc, VALUE *argv, VALUE self) {
+    VALUE stream;
+    VALUE keywords;
+    rb_scan_args(argc, argv, "1:", &stream, &keywords);
+
+    pm_options_t options = { 0 };
+    extract_options(&options, Qnil, keywords);
+
+    pm_parser_t parser;
+    pm_buffer_t buffer;
+
+    pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, parse_stream_eof, &options);
+    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+
+    VALUE source = pm_source_new(&parser, encoding, options.freeze);
+    VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze);
+    VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze);
+
+    pm_node_destroy(&parser, node);
+    pm_buffer_free(&buffer);
+    pm_parser_free(&parser);
+
+    return result;
+}
+
+/**
+ * Parse the given input and return an array of Comment objects.
+ */
+static VALUE
+parse_input_comments(pm_string_t *input, const pm_options_t *options) {
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+
+    pm_node_t *node = pm_parse(&parser);
+    rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+
+    VALUE source = pm_source_new(&parser, encoding, options->freeze);
+    VALUE comments = parser_comments(&parser, source, options->freeze);
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+
+    return comments;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_comments(source, **options) -> Array
+ *
+ * Parse the given string and return an array of Comment objects. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+parse_comments(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+    VALUE result = parse_input_comments(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_file_comments(filepath, **options) -> Array
+ *
+ * Parse the given file and return an array of Comment objects. For supported
+ * options, see Prism::parse.
+ */
+static VALUE
+parse_file_comments(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    VALUE value = parse_input_comments(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_lex(source, **options) -> ParseLexResult
+ *
+ * Parse the given string and return a ParseLexResult instance that contains a
+ * 2-element array, where the first element is the AST and the second element is
+ * an array of Token instances.
+ *
+ * This API is only meant to be used in the case where you need both the AST and
+ * the tokens. If you only need one or the other, use either Prism::parse or
+ * Prism::lex.
+ *
+ * For supported options, see Prism::parse.
+ */
+static VALUE
+parse_lex(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+    VALUE value = parse_lex_input(&input, &options, true);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_lex_file(filepath, **options) -> ParseLexResult
+ *
+ * Parse the given file and return a ParseLexResult instance that contains a
+ * 2-element array, where the first element is the AST and the second element is
+ * an array of Token instances.
+ *
+ * This API is only meant to be used in the case where you need both the AST and
+ * the tokens. If you only need one or the other, use either Prism::parse_file
+ * or Prism::lex_file.
+ *
+ * For supported options, see Prism::parse.
+ */
+static VALUE
+parse_lex_file(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    VALUE value = parse_lex_input(&input, &options, true);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return value;
+}
+
+/**
+ * Parse the given input and return true if it parses without errors.
+ */
+static VALUE
+parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
+    pm_parser_t parser;
+    pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+
+    pm_node_t *node = pm_parse(&parser);
+    pm_node_destroy(&parser, node);
+
+    VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
+    pm_parser_free(&parser);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_success?(source, **options) -> bool
+ *
+ * Parse the given string and return true if it parses without errors. For
+ * supported options, see Prism::parse.
+ */
+static VALUE
+parse_success_p(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+    string_options(argc, argv, &input, &options);
+
+    VALUE result = parse_input_success_p(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_failure?(source, **options) -> bool
+ *
+ * Parse the given string and return true if it parses with errors. For
+ * supported options, see Prism::parse.
+ */
+static VALUE
+parse_failure_p(int argc, VALUE *argv, VALUE self) {
+    return RTEST(parse_success_p(argc, argv, self)) ? Qfalse : Qtrue;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_file_success?(filepath, **options) -> bool
+ *
+ * Parse the given file and return true if it parses without errors. For
+ * supported options, see Prism::parse.
+ */
+static VALUE
+parse_file_success_p(int argc, VALUE *argv, VALUE self) {
+    pm_string_t input;
+    pm_options_t options = { 0 };
+
+    VALUE encoded_filepath;
+    file_options(argc, argv, &input, &options, &encoded_filepath);
+
+    VALUE result = parse_input_success_p(&input, &options);
+    pm_string_free(&input);
+    pm_options_free(&options);
+
+    return result;
+}
+
+/**
+ * call-seq:
+ *   Prism::parse_file_failure?(filepath, **options) -> bool
+ *
+ * Parse the given file and return true if it parses with errors. For
+ * supported options, see Prism::parse.
+ */
+static VALUE
+parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
+    return RTEST(parse_file_success_p(argc, argv, self)) ? Qfalse : Qtrue;
+}
+
+/******************************************************************************/
+/* String query methods                                                       */
+/******************************************************************************/
+
+/**
+ * Process the result of a call to a string query method and return an
+ * appropriate value.
+ */
+static VALUE
+string_query(pm_string_query_t result) {
+    switch (result) {
+        case PM_STRING_QUERY_ERROR:
+            rb_raise(rb_eArgError, "Invalid or non ascii-compatible encoding");
+            return Qfalse;
+        case PM_STRING_QUERY_FALSE:
+            return Qfalse;
+        case PM_STRING_QUERY_TRUE:
+            return Qtrue;
+    }
+    return Qfalse;
+}
+
+/**
+ * call-seq:
+ *   Prism::StringQuery::local?(string) -> bool
+ *
+ * Returns true if the string constitutes a valid local variable name. Note that
+ * this means the names that can be set through Binding#local_variable_set, not
+ * necessarily the ones that can be set through a local variable assignment.
+ */
+static VALUE
+string_query_local_p(VALUE self, VALUE string) {
+    const uint8_t *source = (const uint8_t *) check_string(string);
+    return string_query(pm_string_query_local(source, RSTRING_LEN(string), rb_enc_get(string)->name));
+}
+
+/**
+ * call-seq:
+ *   Prism::StringQuery::constant?(string) -> bool
+ *
+ * Returns true if the string constitutes a valid constant name. Note that this
+ * means the names that can be set through Module#const_set, not necessarily the
+ * ones that can be set through a constant assignment.
+ */
+static VALUE
+string_query_constant_p(VALUE self, VALUE string) {
+    const uint8_t *source = (const uint8_t *) check_string(string);
+    return string_query(pm_string_query_constant(source, RSTRING_LEN(string), rb_enc_get(string)->name));
+}
+
+/**
+ * call-seq:
+ *   Prism::StringQuery::method_name?(string) -> bool
+ *
+ * Returns true if the string constitutes a valid method name.
+ */
+static VALUE
+string_query_method_name_p(VALUE self, VALUE string) {
+    const uint8_t *source = (const uint8_t *) check_string(string);
+    return string_query(pm_string_query_method_name(source, RSTRING_LEN(string), rb_enc_get(string)->name));
+}
+
+/******************************************************************************/
+/* Initialization of the extension                                            */
+/******************************************************************************/
+
+/**
+ * The init function that Ruby calls when loading this extension.
+ */
+RUBY_FUNC_EXPORTED void
+Init_prism(void) {
+    // Make sure that the prism library version matches the expected version.
+    // Otherwise something was compiled incorrectly.
+    if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) {
+        rb_raise(
+            rb_eRuntimeError,
+            "The prism library version (%s) does not match the expected version (%s)",
+            pm_version(),
+            EXPECTED_PRISM_VERSION
+        );
+    }
+
+#ifdef HAVE_RB_EXT_RACTOR_SAFE
+    // Mark this extension as Ractor-safe.
+    rb_ext_ractor_safe(true);
+#endif
+
+    // Grab up references to all of the constants that we're going to need to
+    // reference throughout this extension.
+    rb_cPrism = rb_define_module("Prism");
+    rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject);
+    rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject);
+    rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
+    rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
+    rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
+    rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment);
+    rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment);
+    rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
+    rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
+    rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
+    rb_cPrismResult = rb_define_class_under(rb_cPrism, "Result", rb_cObject);
+    rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
+    rb_cPrismLexResult = rb_define_class_under(rb_cPrism, "LexResult", rb_cPrismResult);
+    rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);
+    rb_cPrismStringQuery = rb_define_class_under(rb_cPrism, "StringQuery", rb_cObject);
+    rb_cPrismScope = rb_define_class_under(rb_cPrism, "Scope", rb_cObject);
+
+    rb_cPrismCurrentVersionError = rb_const_get(rb_cPrism, rb_intern("CurrentVersionError"));
+
+    // Intern all of the IDs eagerly that we support so that we don't have to do
+    // it every time we parse.
+    rb_id_option_command_line = rb_intern_const("command_line");
+    rb_id_option_encoding = rb_intern_const("encoding");
+    rb_id_option_filepath = rb_intern_const("filepath");
+    rb_id_option_freeze = rb_intern_const("freeze");
+    rb_id_option_frozen_string_literal = rb_intern_const("frozen_string_literal");
+    rb_id_option_line = rb_intern_const("line");
+    rb_id_option_main_script = rb_intern_const("main_script");
+    rb_id_option_partial_script = rb_intern_const("partial_script");
+    rb_id_option_scopes = rb_intern_const("scopes");
+    rb_id_option_version = rb_intern_const("version");
+    rb_id_source_for = rb_intern("for");
+    rb_id_forwarding_positionals = rb_intern("*");
+    rb_id_forwarding_keywords = rb_intern("**");
+    rb_id_forwarding_block = rb_intern("&");
+    rb_id_forwarding_all = rb_intern("...");
+
+    /**
+     * The version of the prism library.
+     */
+    rb_define_const(rb_cPrism, "VERSION", rb_str_freeze(rb_str_new_cstr(EXPECTED_PRISM_VERSION)));
+
+    // First, the functions that have to do with lexing and parsing.
+    rb_define_singleton_method(rb_cPrism, "lex", lex, -1);
+    rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1);
+    rb_define_singleton_method(rb_cPrism, "parse", parse, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1);
+    rb_define_singleton_method(rb_cPrism, "profile", profile, -1);
+    rb_define_singleton_method(rb_cPrism, "profile_file", profile_file, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_stream", parse_stream, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_failure?", parse_failure_p, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1);
+    rb_define_singleton_method(rb_cPrism, "parse_file_failure?", parse_file_failure_p, -1);
+
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+    rb_define_singleton_method(rb_cPrism, "dump", dump, -1);
+    rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1);
+#endif
+
+    rb_define_singleton_method(rb_cPrismStringQuery, "local?", string_query_local_p, 1);
+    rb_define_singleton_method(rb_cPrismStringQuery, "constant?", string_query_constant_p, 1);
+    rb_define_singleton_method(rb_cPrismStringQuery, "method_name?", string_query_method_name_p, 1);
+
+    // Next, initialize the other APIs.
+    Init_prism_api_node();
+    Init_prism_pack();
+}
diff --git a/prism/extension.h b/prism/extension.h
new file mode 100644
index 0000000000..510faa48e8
--- /dev/null
+++ b/prism/extension.h
@@ -0,0 +1,19 @@
+#ifndef PRISM_EXT_NODE_H
+#define PRISM_EXT_NODE_H
+
+#define EXPECTED_PRISM_VERSION "1.8.0"
+
+#include <ruby.h>
+#include <ruby/encoding.h>
+#include "prism.h"
+
+VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze);
+VALUE pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze);
+VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze);
+VALUE pm_integer_new(const pm_integer_t *integer);
+
+void Init_prism_api_node(void);
+void Init_prism_pack(void);
+RUBY_FUNC_EXPORTED void Init_prism(void);
+
+#endif
diff --git a/prism/node.h b/prism/node.h
new file mode 100644
index 0000000000..e8686a327c
--- /dev/null
+++ b/prism/node.h
@@ -0,0 +1,129 @@
+/**
+ * @file node.h
+ *
+ * Functions related to nodes in the AST.
+ */
+#ifndef PRISM_NODE_H
+#define PRISM_NODE_H
+
+#include "prism/defines.h"
+#include "prism/parser.h"
+#include "prism/util/pm_buffer.h"
+
+/**
+ * Loop through each node in the node list, writing each node to the given
+ * pm_node_t pointer.
+ */
+#define PM_NODE_LIST_FOREACH(list, index, node) \
+    for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
+
+/**
+ * Append a new node onto the end of the node list.
+ *
+ * @param list The list to append to.
+ * @param node The node to append.
+ */
+void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
+
+/**
+ * Prepend a new node onto the beginning of the node list.
+ *
+ * @param list The list to prepend to.
+ * @param node The node to prepend.
+ */
+void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
+
+/**
+ * Concatenate the given node list onto the end of the other node list.
+ *
+ * @param list The list to concatenate onto.
+ * @param other The list to concatenate.
+ */
+void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);
+
+/**
+ * Free the internal memory associated with the given node list.
+ *
+ * @param list The list to free.
+ */
+void pm_node_list_free(pm_node_list_t *list);
+
+/**
+ * Deallocate a node and all of its children.
+ *
+ * @param parser The parser that owns the node.
+ * @param node The node to deallocate.
+ */
+PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
+
+/**
+ * Returns a string representation of the given node type.
+ *
+ * @param node_type The node type to convert to a string.
+ * @return A string representation of the given node type.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
+
+/**
+ * Visit each of the nodes in this subtree using the given visitor callback. The
+ * callback function will be called for each node in the subtree. If it returns
+ * false, then that node's children will not be visited. If it returns true,
+ * then the children will be visited. The data parameter is treated as an opaque
+ * pointer and is passed to the visitor callback for consumers to use as they
+ * see fit.
+ *
+ * As an example:
+ *
+ * ```c
+ * #include "prism.h"
+ *
+ * bool visit(const pm_node_t *node, void *data) {
+ *     size_t *indent = (size_t *) data;
+ *     for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout);
+ *     printf("%s\n", pm_node_type_to_str(node->type));
+ *
+ *     size_t next_indent = *indent + 1;
+ *     size_t *next_data = &next_indent;
+ *     pm_visit_child_nodes(node, visit, next_data);
+ *
+ *     return false;
+ * }
+ *
+ * int main(void) {
+ *     const char *source = "1 + 2; 3 + 4";
+ *     size_t size = strlen(source);
+ *
+ *     pm_parser_t parser;
+ *     pm_options_t options = { 0 };
+ *     pm_parser_init(&parser, (const uint8_t *) source, size, &options);
+ *
+ *     size_t indent = 0;
+ *     pm_node_t *node = pm_parse(&parser);
+ *
+ *     size_t *data = &indent;
+ *     pm_visit_node(node, visit, data);
+ *
+ *     pm_node_destroy(&parser, node);
+ *     pm_parser_free(&parser);
+ *     return EXIT_SUCCESS;
+ * }
+ * ```
+ *
+ * @param node The root node to start visiting from.
+ * @param visitor The callback to call for each node in the subtree.
+ * @param data An opaque pointer that is passed to the visitor callback.
+ */
+PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+
+/**
+ * Visit the children of the given node with the given callback. This is the
+ * default behavior for walking the tree that is called from pm_visit_node if
+ * the callback returns true.
+ *
+ * @param node The node to visit the children of.
+ * @param visitor The callback to call for each child node.
+ * @param data An opaque pointer that is passed to the visitor callback.
+ */
+PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+
+#endif
diff --git a/prism/options.c b/prism/options.c
new file mode 100644
index 0000000000..09d2a65a6c
--- /dev/null
+++ b/prism/options.c
@@ -0,0 +1,338 @@
+#include "prism/options.h"
+
+/**
+ * Set the shebang callback option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) {
+    options->shebang_callback = shebang_callback;
+    options->shebang_callback_data = shebang_callback_data;
+}
+
+/**
+ * Set the filepath option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_filepath_set(pm_options_t *options, const char *filepath) {
+    pm_string_constant_init(&options->filepath, filepath, strlen(filepath));
+}
+
+/**
+ * Set the encoding option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_encoding_set(pm_options_t *options, const char *encoding) {
+    pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
+}
+
+/**
+ * Set the encoding_locked option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
+    options->encoding_locked = encoding_locked;
+}
+
+/**
+ * Set the line option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_line_set(pm_options_t *options, int32_t line) {
+    options->line = line;
+}
+
+/**
+ * Set the frozen string literal option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) {
+    options->frozen_string_literal = frozen_string_literal ? PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED : PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
+}
+
+/**
+ * Sets the command line option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_command_line_set(pm_options_t *options, uint8_t command_line) {
+    options->command_line = command_line;
+}
+
+/**
+ * Checks if the given slice represents a number.
+ */
+static inline bool
+is_number(const char *string, size_t length) {
+    return pm_strspn_decimal_digit((const uint8_t *) string, (ptrdiff_t) length) == length;
+}
+
+/**
+ * Set the version option on the given options struct by parsing the given
+ * string. If the string contains an invalid option, this returns false.
+ * Otherwise, it returns true.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_options_version_set(pm_options_t *options, const char *version, size_t length) {
+    if (version == NULL) {
+        options->version = PM_OPTIONS_VERSION_LATEST;
+        return true;
+    }
+
+    if (length == 3) {
+        if (strncmp(version, "3.3", 3) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
+            return true;
+        }
+
+        if (strncmp(version, "3.4", 3) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_3_4;
+            return true;
+        }
+
+        if (strncmp(version, "3.5", 3) == 0 || strncmp(version, "4.0", 3) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+            return true;
+        }
+
+        if (strncmp(version, "4.1", 3) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
+            return true;
+        }
+
+        return false;
+    }
+
+    if (length >= 4 && is_number(version + 4, length - 4)) {
+        if (strncmp(version, "3.3.", 4) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
+            return true;
+        }
+
+        if (strncmp(version, "3.4.", 4) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_3_4;
+            return true;
+        }
+
+        if (strncmp(version, "3.5.", 4) == 0 || strncmp(version, "4.0.", 4) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+            return true;
+        }
+
+        if (strncmp(version, "4.1.", 4) == 0) {
+            options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
+            return true;
+        }
+    }
+
+    if (length >= 6) {
+        if (strncmp(version, "latest", 7) == 0) { // 7 to compare the \0 as well
+            options->version = PM_OPTIONS_VERSION_LATEST;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * Set the main script option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_main_script_set(pm_options_t *options, bool main_script) {
+    options->main_script = main_script;
+}
+
+/**
+ * Set the partial script option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_partial_script_set(pm_options_t *options, bool partial_script) {
+    options->partial_script = partial_script;
+}
+
+/**
+ * Set the freeze option on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_freeze_set(pm_options_t *options, bool freeze) {
+    options->freeze = freeze;
+}
+
+// For some reason, GCC analyzer thinks we're leaking allocated scopes and
+// locals here, even though we definitely aren't. This is a false positive.
+// Ideally we wouldn't need to suppress this.
+#if defined(__GNUC__) && (__GNUC__ >= 10)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wanalyzer-malloc-leak"
+#endif
+
+/**
+ * Allocate and zero out the scopes array on the given options struct.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
+    options->scopes_count = scopes_count;
+    options->scopes = xcalloc(scopes_count, sizeof(pm_options_scope_t));
+    return options->scopes != NULL;
+}
+
+/**
+ * Return a pointer to the scope at the given index within the given options.
+ */
+PRISM_EXPORTED_FUNCTION const pm_options_scope_t *
+pm_options_scope_get(const pm_options_t *options, size_t index) {
+    return &options->scopes[index];
+}
+
+/**
+ * Create a new options scope struct. This will hold a set of locals that are in
+ * scope surrounding the code that is being parsed.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) {
+    scope->locals_count = locals_count;
+    scope->locals = xcalloc(locals_count, sizeof(pm_string_t));
+    scope->forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE;
+    return scope->locals != NULL;
+}
+
+/**
+ * Return a pointer to the local at the given index within the given scope.
+ */
+PRISM_EXPORTED_FUNCTION const pm_string_t *
+pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) {
+    return &scope->locals[index];
+}
+
+/**
+ * Set the forwarding option on the given scope struct.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) {
+    scope->forwarding = forwarding;
+}
+
+/**
+ * Free the internal memory associated with the options.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_options_free(pm_options_t *options) {
+    pm_string_free(&options->filepath);
+    pm_string_free(&options->encoding);
+
+    for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
+        pm_options_scope_t *scope = &options->scopes[scope_index];
+
+        for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
+            pm_string_free(&scope->locals[local_index]);
+        }
+
+        xfree(scope->locals);
+    }
+
+    xfree(options->scopes);
+}
+
+/**
+ * Read a 32-bit unsigned integer from a pointer. This function is used to read
+ * the options that are passed into the parser from the Ruby implementation. It
+ * handles aligned and unaligned reads.
+ */
+static uint32_t
+pm_options_read_u32(const char *data) {
+    if (((uintptr_t) data) % sizeof(uint32_t) == 0) {
+        return *((uint32_t *) data);
+    } else {
+        uint32_t value;
+        memcpy(&value, data, sizeof(uint32_t));
+        return value;
+    }
+}
+
+/**
+ * Read a 32-bit signed integer from a pointer. This function is used to read
+ * the options that are passed into the parser from the Ruby implementation. It
+ * handles aligned and unaligned reads.
+ */
+static int32_t
+pm_options_read_s32(const char *data) {
+    if (((uintptr_t) data) % sizeof(int32_t) == 0) {
+        return *((int32_t *) data);
+    } else {
+        int32_t value;
+        memcpy(&value, data, sizeof(int32_t));
+        return value;
+    }
+}
+
+/**
+ * Deserialize an options struct from the given binary string. This is used to
+ * pass options to the parser from an FFI call so that consumers of the library
+ * from an FFI perspective don't have to worry about the structure of our
+ * options structs. Since the source of these calls will be from Ruby
+ * implementation internals we assume it is from a trusted source.
+ */
+void
+pm_options_read(pm_options_t *options, const char *data) {
+    options->line = 1; // default
+    if (data == NULL) return;
+
+    uint32_t filepath_length = pm_options_read_u32(data);
+    data += 4;
+
+    if (filepath_length > 0) {
+        pm_string_constant_init(&options->filepath, data, filepath_length);
+        data += filepath_length;
+    }
+
+    options->line = pm_options_read_s32(data);
+    data += 4;
+
+    uint32_t encoding_length = pm_options_read_u32(data);
+    data += 4;
+
+    if (encoding_length > 0) {
+        pm_string_constant_init(&options->encoding, data, encoding_length);
+        data += encoding_length;
+    }
+
+    options->frozen_string_literal = (int8_t) *data++;
+    options->command_line = (uint8_t) *data++;
+    options->version = (pm_options_version_t) *data++;
+    options->encoding_locked = ((uint8_t) *data++) > 0;
+    options->main_script = ((uint8_t) *data++) > 0;
+    options->partial_script = ((uint8_t) *data++) > 0;
+    options->freeze = ((uint8_t) *data++) > 0;
+
+    uint32_t scopes_count = pm_options_read_u32(data);
+    data += 4;
+
+    if (scopes_count > 0) {
+        if (!pm_options_scopes_init(options, scopes_count)) return;
+
+        for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) {
+            uint32_t locals_count = pm_options_read_u32(data);
+            data += 4;
+
+            pm_options_scope_t *scope = &options->scopes[scope_index];
+            if (!pm_options_scope_init(scope, locals_count)) {
+                pm_options_free(options);
+                return;
+            }
+
+            uint8_t forwarding = (uint8_t) *data++;
+            pm_options_scope_forwarding_set(&options->scopes[scope_index], forwarding);
+
+            for (size_t local_index = 0; local_index < locals_count; local_index++) {
+                uint32_t local_length = pm_options_read_u32(data);
+                data += 4;
+
+                pm_string_constant_init(&scope->locals[local_index], data, local_length);
+                data += local_length;
+            }
+        }
+    }
+}
+
+#if defined(__GNUC__) && (__GNUC__ >= 10)
+#pragma GCC diagnostic pop
+#endif
diff --git a/prism/options.h b/prism/options.h
new file mode 100644
index 0000000000..c00c7bf755
--- /dev/null
+++ b/prism/options.h
@@ -0,0 +1,488 @@
+/**
+ * @file options.h
+ *
+ * The options that can be passed to parsing.
+ */
+#ifndef PRISM_OPTIONS_H
+#define PRISM_OPTIONS_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_char.h"
+#include "prism/util/pm_string.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * String literals should be made frozen.
+ */
+#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED   ((int8_t) -1)
+
+/**
+ * String literals may be frozen or mutable depending on the implementation
+ * default.
+ */
+#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET      ((int8_t)  0)
+
+/**
+ * String literals should be made mutable.
+ */
+#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED    ((int8_t)  1)
+
+/**
+ * A scope of locals surrounding the code that is being parsed.
+ */
+typedef struct pm_options_scope {
+    /** The number of locals in the scope. */
+    size_t locals_count;
+
+    /** The names of the locals in the scope. */
+    pm_string_t *locals;
+
+    /** Flags for the set of forwarding parameters in this scope. */
+    uint8_t forwarding;
+} pm_options_scope_t;
+
+/** The default value for parameters. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_NONE = 0x0;
+
+/** When the scope is fowarding with the * parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS = 0x1;
+
+/** When the scope is fowarding with the ** parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS = 0x2;
+
+/** When the scope is fowarding with the & parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4;
+
+/** When the scope is fowarding with the ... parameter. */
+static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8;
+
+// Forward declaration needed by the callback typedef.
+struct pm_options;
+
+/**
+ * The callback called when additional switches are found in a shebang comment
+ * that need to be processed by the runtime.
+ *
+ * @param options The options struct that may be updated by this callback.
+ *   Certain fields will be checked for changes, specifically encoding,
+ *   command_line, and frozen_string_literal.
+ * @param source The source of the shebang comment.
+ * @param length The length of the source.
+ * @param shebang_callback_data Any additional data that should be passed along
+ *   to the callback.
+ */
+typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
+
+/**
+ * The version of Ruby syntax that we should be parsing with. This is used to
+ * allow consumers to specify which behavior they want in case they need to
+ * parse in the same way as a specific version of CRuby would have.
+ */
+typedef enum {
+    /** If an explicit version is not provided, the current version of prism will be used. */
+    PM_OPTIONS_VERSION_UNSET = 0,
+
+    /** The vendored version of prism in CRuby 3.3.x. */
+    PM_OPTIONS_VERSION_CRUBY_3_3 = 1,
+
+    /** The vendored version of prism in CRuby 3.4.x. */
+    PM_OPTIONS_VERSION_CRUBY_3_4 = 2,
+
+    /** The vendored version of prism in CRuby 4.0.x. */
+    PM_OPTIONS_VERSION_CRUBY_3_5 = 3,
+
+    /** The vendored version of prism in CRuby 4.0.x. */
+    PM_OPTIONS_VERSION_CRUBY_4_0 = 3,
+
+    /** The vendored version of prism in CRuby 4.1.x. */
+    PM_OPTIONS_VERSION_CRUBY_4_1 = 4,
+
+    /** The current version of prism. */
+    PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1
+} pm_options_version_t;
+
+/**
+ * The options that can be passed to the parser.
+ */
+typedef struct pm_options {
+    /**
+     * The callback to call when additional switches are found in a shebang
+     * comment.
+     */
+    pm_options_shebang_callback_t shebang_callback;
+
+    /**
+     * Any additional data that should be passed along to the shebang callback
+     * if one was set.
+     */
+    void *shebang_callback_data;
+
+    /** The name of the file that is currently being parsed. */
+    pm_string_t filepath;
+
+    /**
+     * The line within the file that the parse starts on. This value is
+     * 1-indexed.
+     */
+    int32_t line;
+
+    /**
+     * The name of the encoding that the source file is in. Note that this must
+     * correspond to a name that can be found with Encoding.find in Ruby.
+     */
+    pm_string_t encoding;
+
+    /**
+     * The number of scopes surrounding the code that is being parsed.
+     */
+    size_t scopes_count;
+
+    /**
+     * The scopes surrounding the code that is being parsed. For most parses
+     * this will be NULL, but for evals it will be the locals that are in scope
+     * surrounding the eval. Scopes are ordered from the outermost scope to the
+     * innermost one.
+     */
+    pm_options_scope_t *scopes;
+
+    /**
+     * The version of prism that we should be parsing with. This is used to
+     * allow consumers to specify which behavior they want in case they need to
+     * parse exactly as a specific version of CRuby.
+     */
+    pm_options_version_t version;
+
+    /** A bitset of the various options that were set on the command line. */
+    uint8_t command_line;
+
+    /**
+    * Whether or not the frozen string literal option has been set.
+    * May be:
+    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+    *  - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+    */
+    int8_t frozen_string_literal;
+
+    /**
+     * Whether or not the encoding magic comments should be respected. This is a
+     * niche use-case where you want to parse a file with a specific encoding
+     * but ignore any encoding magic comments at the top of the file.
+     */
+    bool encoding_locked;
+
+    /**
+     * When the file being parsed is the main script, the shebang will be
+     * considered for command-line flags (or for implicit -x). The caller needs
+     * to pass this information to the parser so that it can behave correctly.
+     */
+    bool main_script;
+
+    /**
+     * When the file being parsed is considered a "partial" script, jumps will
+     * not be marked as errors if they are not contained within loops/blocks.
+     * This is used in the case that you're parsing a script that you know will
+     * be embedded inside another script later, but you do not have that context
+     * yet. For example, when parsing an ERB template that will be evaluated
+     * inside another script.
+     */
+    bool partial_script;
+
+    /**
+     * Whether or not the parser should freeze the nodes that it creates. This
+     * makes it possible to have a deeply frozen AST that is safe to share
+     * between concurrency primitives.
+     */
+    bool freeze;
+} pm_options_t;
+
+/**
+ * A bit representing whether or not the command line -a option was set. -a
+ * splits the input line $_ into $F.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_A = 0x1;
+
+/**
+ * A bit representing whether or not the command line -e option was set. -e
+ * allow the user to specify a script to be executed. This is necessary for
+ * prism to know because certain warnings are not generated when -e is used.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_E = 0x2;
+
+/**
+ * A bit representing whether or not the command line -l option was set. -l
+ * chomps the input line by default.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_L = 0x4;
+
+/**
+ * A bit representing whether or not the command line -n option was set. -n
+ * wraps the script in a while gets loop.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_N = 0x8;
+
+/**
+ * A bit representing whether or not the command line -p option was set. -p
+ * prints the value of $_ at the end of each loop.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
+
+/**
+ * A bit representing whether or not the command line -x option was set. -x
+ * searches the input file for a shebang that matches the current Ruby engine.
+ */
+static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
+
+/**
+ * Set the shebang callback option on the given options struct.
+ *
+ * @param options The options struct to set the shebang callback on.
+ * @param shebang_callback The shebang callback to set.
+ * @param shebang_callback_data Any additional data that should be passed along
+ *   to the callback.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
+
+/**
+ * Set the filepath option on the given options struct.
+ *
+ * @param options The options struct to set the filepath on.
+ * @param filepath The filepath to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath);
+
+/**
+ * Set the line option on the given options struct.
+ *
+ * @param options The options struct to set the line on.
+ * @param line The line to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
+
+/**
+ * Set the encoding option on the given options struct.
+ *
+ * @param options The options struct to set the encoding on.
+ * @param encoding The encoding to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
+
+/**
+ * Set the encoding_locked option on the given options struct.
+ *
+ * @param options The options struct to set the encoding_locked value on.
+ * @param encoding_locked The encoding_locked value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
+
+/**
+ * Set the frozen string literal option on the given options struct.
+ *
+ * @param options The options struct to set the frozen string literal value on.
+ * @param frozen_string_literal The frozen string literal value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
+
+/**
+ * Sets the command line option on the given options struct.
+ *
+ * @param options The options struct to set the command line option on.
+ * @param command_line The command_line value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line);
+
+/**
+ * Set the version option on the given options struct by parsing the given
+ * string. If the string contains an invalid option, this returns false.
+ * Otherwise, it returns true.
+ *
+ * @param options The options struct to set the version on.
+ * @param version The version to set.
+ * @param length The length of the version string.
+ * @return Whether or not the version was parsed successfully.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
+
+/**
+ * Set the main script option on the given options struct.
+ *
+ * @param options The options struct to set the main script value on.
+ * @param main_script The main script value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
+
+/**
+ * Set the partial script option on the given options struct.
+ *
+ * @param options The options struct to set the partial script value on.
+ * @param partial_script The partial script value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
+
+/**
+ * Set the freeze option on the given options struct.
+ *
+ * @param options The options struct to set the freeze value on.
+ * @param freeze The freeze value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze);
+
+/**
+ * Allocate and zero out the scopes array on the given options struct.
+ *
+ * @param options The options struct to initialize the scopes array on.
+ * @param scopes_count The number of scopes to allocate.
+ * @return Whether or not the scopes array was initialized successfully.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
+
+/**
+ * Return a pointer to the scope at the given index within the given options.
+ *
+ * @param options The options struct to get the scope from.
+ * @param index The index of the scope to get.
+ * @return A pointer to the scope at the given index.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index);
+
+/**
+ * Create a new options scope struct. This will hold a set of locals that are in
+ * scope surrounding the code that is being parsed.
+ *
+ * @param scope The scope struct to initialize.
+ * @param locals_count The number of locals to allocate.
+ * @return Whether or not the scope was initialized successfully.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
+
+/**
+ * Return a pointer to the local at the given index within the given scope.
+ *
+ * @param scope The scope struct to get the local from.
+ * @param index The index of the local to get.
+ * @return A pointer to the local at the given index.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index);
+
+/**
+ * Set the forwarding option on the given scope struct.
+ *
+ * @param scope The scope struct to set the forwarding on.
+ * @param forwarding The forwarding value to set.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding);
+
+/**
+ * Free the internal memory associated with the options.
+ *
+ * @param options The options struct whose internal memory should be freed.
+ *
+ * \public \memberof pm_options
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
+
+/**
+ * Deserialize an options struct from the given binary string. This is used to
+ * pass options to the parser from an FFI call so that consumers of the library
+ * from an FFI perspective don't have to worry about the structure of our
+ * options structs. Since the source of these calls will be from Ruby
+ * implementation internals we assume it is from a trusted source.
+ *
+ * `data` is assumed to be a valid pointer pointing to well-formed data. The
+ * layout of this data should be the same every time, and is described below:
+ *
+ * | # bytes | field                      |
+ * | ------- | -------------------------- |
+ * | `4`     | the length of the filepath |
+ * | ...     | the filepath bytes         |
+ * | `4`     | the line number            |
+ * | `4`     | the length the encoding    |
+ * | ...     | the encoding bytes         |
+ * | `1`     | frozen string literal      |
+ * | `1`     | -p command line option     |
+ * | `1`     | -n command line option     |
+ * | `1`     | -l command line option     |
+ * | `1`     | -a command line option     |
+ * | `1`     | the version                |
+ * | `1`     | encoding locked            |
+ * | `1`     | main script                |
+ * | `1`     | partial script             |
+ * | `1`     | freeze                     |
+ * | `4`     | the number of scopes       |
+ * | ...     | the scopes                 |
+ *
+ * The version field is an enum, so it should be one of the following values:
+ *
+ * | value | version                   |
+ * | ----- | ------------------------- |
+ * | `0`   | use the latest version of prism |
+ * | `1`   | use the version of prism that is vendored in CRuby 3.3.0 |
+ *
+ * Each scope is laid out as follows:
+ *
+ * | # bytes | field                      |
+ * | ------- | -------------------------- |
+ * | `4`     | the number of locals       |
+ * | `1`     | the forwarding flags       |
+ * | ...     | the locals                 |
+ *
+ * Each local is laid out as follows:
+ *
+ * | # bytes | field                      |
+ * | ------- | -------------------------- |
+ * | `4`     | the length of the local    |
+ * | ...     | the local bytes            |
+ *
+ * Some additional things to note about this layout:
+ *
+ * * The filepath can have a length of 0, in which case we'll consider it an
+ *   empty string.
+ * * The line number should be 0-indexed.
+ * * The encoding can have a length of 0, in which case we'll use the default
+ *   encoding (UTF-8). If it's not 0, it should correspond to a name of an
+ *   encoding that can be passed to `Encoding.find` in Ruby.
+ * * The frozen string literal, encoding locked, main script, and partial script
+ *   fields are booleans, so their values should be either 0 or 1.
+ * * The number of scopes can be 0.
+ *
+ * @param options The options struct to deserialize into.
+ * @param data The binary string to deserialize from.
+ */
+void pm_options_read(pm_options_t *options, const char *data);
+
+#endif
diff --git a/prism/pack.c b/prism/pack.c
new file mode 100644
index 0000000000..1388ca8a3b
--- /dev/null
+++ b/prism/pack.c
@@ -0,0 +1,509 @@
+#include "prism/pack.h"
+
+// We optionally support parsing String#pack templates. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_PACK define.
+#ifdef PRISM_EXCLUDE_PACK
+
+void pm_pack_parse(void) {}
+
+#else
+
+#include <stdbool.h>
+#include <errno.h>
+
+static uintmax_t
+strtoumaxc(const char **format) {
+    uintmax_t value = 0;
+    while (**format >= '0' && **format <= '9') {
+        if (value > UINTMAX_MAX / 10) {
+            errno = ERANGE;
+        }
+        value = value * 10 + ((uintmax_t) (**format - '0'));
+        (*format)++;
+    }
+    return value;
+}
+
+PRISM_EXPORTED_FUNCTION pm_pack_result
+pm_pack_parse(
+    pm_pack_variant variant,
+    const char **format,
+    const char *format_end,
+    pm_pack_type *type,
+    pm_pack_signed *signed_type,
+    pm_pack_endian *endian,
+    pm_pack_size *size,
+    pm_pack_length_type *length_type,
+    uint64_t *length,
+    pm_pack_encoding *encoding
+) {
+    if (*encoding == PM_PACK_ENCODING_START) {
+        *encoding = PM_PACK_ENCODING_US_ASCII;
+    }
+
+    if (*format == format_end) {
+            *type = PM_PACK_END;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            *length_type = PM_PACK_LENGTH_NA;
+            return PM_PACK_OK;
+    }
+
+    *length_type = PM_PACK_LENGTH_FIXED;
+    *length = 1;
+    bool length_changed_allowed = true;
+
+    char directive = **format;
+    (*format)++;
+    switch (directive) {
+        case ' ':
+        case '\t':
+        case '\n':
+        case '\v':
+        case '\f':
+        case '\r':
+            *type = PM_PACK_SPACE;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            *length_type = PM_PACK_LENGTH_NA;
+            *length = 0;
+            return PM_PACK_OK;
+        case '#':
+            while ((*format < format_end) && (**format != '\n')) {
+                (*format)++;
+            }
+            *type = PM_PACK_COMMENT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            *length_type = PM_PACK_LENGTH_NA;
+            *length = 0;
+            return PM_PACK_OK;
+        case 'C':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_AGNOSTIC_ENDIAN;
+            *size = PM_PACK_SIZE_8;
+            break;
+        case 'S':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_16;
+            break;
+        case 'L':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            break;
+        case 'Q':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_64;
+            break;
+        case 'J':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_P;
+            break;
+        case 'c':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_SIGNED;
+            *endian = PM_PACK_AGNOSTIC_ENDIAN;
+            *size = PM_PACK_SIZE_8;
+            break;
+        case 's':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_SIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_16;
+            break;
+        case 'l':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_SIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            break;
+        case 'q':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_SIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_64;
+            break;
+        case 'j':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_SIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_P;
+            break;
+        case 'I':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_INT;
+            break;
+        case 'i':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_SIGNED;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_INT;
+            break;
+        case 'n':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_BIG_ENDIAN;
+            *size = PM_PACK_SIZE_16;
+            length_changed_allowed = false;
+            break;
+        case 'N':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_BIG_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            length_changed_allowed = false;
+            break;
+        case 'v':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_LITTLE_ENDIAN;
+            *size = PM_PACK_SIZE_16;
+            length_changed_allowed = false;
+            break;
+        case 'V':
+            *type = PM_PACK_INTEGER;
+            *signed_type = PM_PACK_UNSIGNED;
+            *endian = PM_PACK_LITTLE_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            length_changed_allowed = false;
+            break;
+        case 'U':
+            *type = PM_PACK_UTF8;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'w':
+            *type = PM_PACK_BER;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'D':
+        case 'd':
+            *type = PM_PACK_FLOAT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_64;
+            break;
+        case 'F':
+        case 'f':
+            *type = PM_PACK_FLOAT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_NATIVE_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            break;
+        case 'E':
+            *type = PM_PACK_FLOAT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_LITTLE_ENDIAN;
+            *size = PM_PACK_SIZE_64;
+            break;
+        case 'e':
+            *type = PM_PACK_FLOAT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_LITTLE_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            break;
+        case 'G':
+            *type = PM_PACK_FLOAT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_BIG_ENDIAN;
+            *size = PM_PACK_SIZE_64;
+            break;
+        case 'g':
+            *type = PM_PACK_FLOAT;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_BIG_ENDIAN;
+            *size = PM_PACK_SIZE_32;
+            break;
+        case 'A':
+            *type = PM_PACK_STRING_SPACE_PADDED;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'a':
+            *type = PM_PACK_STRING_NULL_PADDED;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'Z':
+            *type = PM_PACK_STRING_NULL_TERMINATED;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'B':
+            *type = PM_PACK_STRING_MSB;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'b':
+            *type = PM_PACK_STRING_LSB;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'H':
+            *type = PM_PACK_STRING_HEX_HIGH;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'h':
+            *type = PM_PACK_STRING_HEX_LOW;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'u':
+            *type = PM_PACK_STRING_UU;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'M':
+            *type = PM_PACK_STRING_MIME;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'm':
+            *type = PM_PACK_STRING_BASE64;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'P':
+            *type = PM_PACK_STRING_FIXED;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'p':
+            *type = PM_PACK_STRING_POINTER;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case '@':
+            *type = PM_PACK_MOVE;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'X':
+            *type = PM_PACK_BACK;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case 'x':
+            *type = PM_PACK_NULL;
+            *signed_type = PM_PACK_SIGNED_NA;
+            *endian = PM_PACK_ENDIAN_NA;
+            *size = PM_PACK_SIZE_NA;
+            break;
+        case '%':
+            return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
+        default:
+            return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
+    }
+
+    bool explicit_endian = false;
+
+    while (*format < format_end) {
+        switch (**format) {
+            case '_':
+            case '!':
+                (*format)++;
+                if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
+                    return PM_PACK_ERROR_BANG_NOT_ALLOWED;
+                }
+                switch (*size) {
+                    case PM_PACK_SIZE_SHORT:
+                    case PM_PACK_SIZE_INT:
+                    case PM_PACK_SIZE_LONG:
+                    case PM_PACK_SIZE_LONG_LONG:
+                        break;
+                    case PM_PACK_SIZE_16:
+                        *size = PM_PACK_SIZE_SHORT;
+                        break;
+                    case PM_PACK_SIZE_32:
+                        *size = PM_PACK_SIZE_LONG;
+                        break;
+                    case PM_PACK_SIZE_64:
+                        *size = PM_PACK_SIZE_LONG_LONG;
+                        break;
+                    case PM_PACK_SIZE_P:
+                        break;
+                    default:
+                        return PM_PACK_ERROR_BANG_NOT_ALLOWED;
+                }
+                break;
+            case '<':
+                (*format)++;
+                if (explicit_endian) {
+                    return PM_PACK_ERROR_DOUBLE_ENDIAN;
+                }
+                *endian = PM_PACK_LITTLE_ENDIAN;
+                explicit_endian = true;
+                break;
+            case '>':
+                (*format)++;
+                if (explicit_endian) {
+                    return PM_PACK_ERROR_DOUBLE_ENDIAN;
+                }
+                *endian = PM_PACK_BIG_ENDIAN;
+                explicit_endian = true;
+                break;
+            default:
+                goto exit_modifier_loop;
+        }
+    }
+
+exit_modifier_loop:
+
+    if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
+        *length = 0;
+    }
+
+    if (*format < format_end) {
+        if (**format == '*') {
+            switch (*type) {
+                case PM_PACK_NULL:
+                case PM_PACK_BACK:
+                    switch (variant) {
+                        case PM_PACK_VARIANT_PACK:
+                            *length_type = PM_PACK_LENGTH_FIXED;
+                            break;
+                        case PM_PACK_VARIANT_UNPACK:
+                            *length_type = PM_PACK_LENGTH_MAX;
+                            break;
+                    }
+                    *length = 0;
+                    break;
+
+                case PM_PACK_MOVE:
+                    switch (variant) {
+                        case PM_PACK_VARIANT_PACK:
+                            *length_type = PM_PACK_LENGTH_FIXED;
+                            break;
+                        case PM_PACK_VARIANT_UNPACK:
+                            *length_type = PM_PACK_LENGTH_RELATIVE;
+                            break;
+                    }
+                    *length = 0;
+                    break;
+
+                case PM_PACK_STRING_UU:
+                    *length_type = PM_PACK_LENGTH_FIXED;
+                    *length = 0;
+                    break;
+
+                case PM_PACK_STRING_FIXED:
+                    switch (variant) {
+                        case PM_PACK_VARIANT_PACK:
+                            *length_type = PM_PACK_LENGTH_FIXED;
+                            *length = 1;
+                            break;
+                        case PM_PACK_VARIANT_UNPACK:
+                            *length_type = PM_PACK_LENGTH_MAX;
+                            *length = 0;
+                            break;
+                    }
+                    break;
+
+                case PM_PACK_STRING_MIME:
+                case PM_PACK_STRING_BASE64:
+                    *length_type = PM_PACK_LENGTH_FIXED;
+                    *length = 1;
+                    break;
+
+                default:
+                    *length_type = PM_PACK_LENGTH_MAX;
+                    *length = 0;
+                    break;
+            }
+
+            (*format)++;
+        } else if (**format >= '0' && **format <= '9') {
+            errno = 0;
+            *length_type = PM_PACK_LENGTH_FIXED;
+            #if UINTMAX_MAX < UINT64_MAX
+                #error "prism's design assumes uintmax_t is at least as large as uint64_t"
+            #endif
+            uintmax_t length_max = strtoumaxc(format);
+            if (errno || length_max > UINT64_MAX) {
+                return PM_PACK_ERROR_LENGTH_TOO_BIG;
+            }
+            *length = (uint64_t) length_max;
+        }
+    }
+
+    switch (*type) {
+        case PM_PACK_UTF8:
+            /* if encoding is US-ASCII, upgrade to UTF-8 */
+            if (*encoding == PM_PACK_ENCODING_US_ASCII) {
+                *encoding = PM_PACK_ENCODING_UTF_8;
+            }
+            break;
+        case PM_PACK_STRING_MIME:
+        case PM_PACK_STRING_BASE64:
+        case PM_PACK_STRING_UU:
+            /* keep US-ASCII (do nothing) */
+            break;
+        default:
+            /* fall back to BINARY */
+            *encoding = PM_PACK_ENCODING_ASCII_8BIT;
+            break;
+    }
+
+    return PM_PACK_OK;
+}
+
+PRISM_EXPORTED_FUNCTION size_t
+pm_size_to_native(pm_pack_size size) {
+    switch (size) {
+        case PM_PACK_SIZE_SHORT:
+            return sizeof(short);
+        case PM_PACK_SIZE_INT:
+            return sizeof(int);
+        case PM_PACK_SIZE_LONG:
+            return sizeof(long);
+        case PM_PACK_SIZE_LONG_LONG:
+            return sizeof(long long);
+        case PM_PACK_SIZE_8:
+            return 1;
+        case PM_PACK_SIZE_16:
+            return 2;
+        case PM_PACK_SIZE_32:
+            return 4;
+        case PM_PACK_SIZE_64:
+            return 8;
+        case PM_PACK_SIZE_P:
+            return sizeof(void *);
+        default:
+            return 0;
+    }
+}
+
+#endif
diff --git a/prism/pack.h b/prism/pack.h
new file mode 100644
index 0000000000..0b0b4b19cc
--- /dev/null
+++ b/prism/pack.h
@@ -0,0 +1,163 @@
+/**
+ * @file pack.h
+ *
+ * A pack template string parser.
+ */
+#ifndef PRISM_PACK_H
+#define PRISM_PACK_H
+
+#include "prism/defines.h"
+
+// We optionally support parsing String#pack templates. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_PACK define.
+#ifdef PRISM_EXCLUDE_PACK
+
+void pm_pack_parse(void);
+
+#else
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/** The version of the pack template language that we are parsing. */
+typedef enum pm_pack_version {
+    PM_PACK_VERSION_3_2_0
+} pm_pack_version;
+
+/** The type of pack template we are parsing. */
+typedef enum pm_pack_variant {
+    PM_PACK_VARIANT_PACK,
+    PM_PACK_VARIANT_UNPACK
+} pm_pack_variant;
+
+/** A directive within the pack template. */
+typedef enum pm_pack_type {
+    PM_PACK_SPACE,
+    PM_PACK_COMMENT,
+    PM_PACK_INTEGER,
+    PM_PACK_UTF8,
+    PM_PACK_BER,
+    PM_PACK_FLOAT,
+    PM_PACK_STRING_SPACE_PADDED,
+    PM_PACK_STRING_NULL_PADDED,
+    PM_PACK_STRING_NULL_TERMINATED,
+    PM_PACK_STRING_MSB,
+    PM_PACK_STRING_LSB,
+    PM_PACK_STRING_HEX_HIGH,
+    PM_PACK_STRING_HEX_LOW,
+    PM_PACK_STRING_UU,
+    PM_PACK_STRING_MIME,
+    PM_PACK_STRING_BASE64,
+    PM_PACK_STRING_FIXED,
+    PM_PACK_STRING_POINTER,
+    PM_PACK_MOVE,
+    PM_PACK_BACK,
+    PM_PACK_NULL,
+    PM_PACK_END
+} pm_pack_type;
+
+/** The signness of a pack directive. */
+typedef enum pm_pack_signed {
+    PM_PACK_UNSIGNED,
+    PM_PACK_SIGNED,
+    PM_PACK_SIGNED_NA
+} pm_pack_signed;
+
+/** The endianness of a pack directive. */
+typedef enum pm_pack_endian {
+    PM_PACK_AGNOSTIC_ENDIAN,
+    PM_PACK_LITTLE_ENDIAN,      // aka 'VAX', or 'V'
+    PM_PACK_BIG_ENDIAN,         // aka 'network', or 'N'
+    PM_PACK_NATIVE_ENDIAN,
+    PM_PACK_ENDIAN_NA
+} pm_pack_endian;
+
+/** The size of an integer pack directive. */
+typedef enum pm_pack_size {
+    PM_PACK_SIZE_SHORT,
+    PM_PACK_SIZE_INT,
+    PM_PACK_SIZE_LONG,
+    PM_PACK_SIZE_LONG_LONG,
+    PM_PACK_SIZE_8,
+    PM_PACK_SIZE_16,
+    PM_PACK_SIZE_32,
+    PM_PACK_SIZE_64,
+    PM_PACK_SIZE_P,
+    PM_PACK_SIZE_NA
+} pm_pack_size;
+
+/** The type of length of a pack directive. */
+typedef enum pm_pack_length_type {
+    PM_PACK_LENGTH_FIXED,
+    PM_PACK_LENGTH_MAX,
+    PM_PACK_LENGTH_RELATIVE,  // special case for unpack @*
+    PM_PACK_LENGTH_NA
+} pm_pack_length_type;
+
+/** The type of encoding for a pack template string. */
+typedef enum pm_pack_encoding {
+    PM_PACK_ENCODING_START,
+    PM_PACK_ENCODING_ASCII_8BIT,
+    PM_PACK_ENCODING_US_ASCII,
+    PM_PACK_ENCODING_UTF_8
+} pm_pack_encoding;
+
+/** The result of parsing a pack template. */
+typedef enum pm_pack_result {
+    PM_PACK_OK,
+    PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
+    PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
+    PM_PACK_ERROR_LENGTH_TOO_BIG,
+    PM_PACK_ERROR_BANG_NOT_ALLOWED,
+    PM_PACK_ERROR_DOUBLE_ENDIAN
+} pm_pack_result;
+
+/**
+ * Parse a single directive from a pack or unpack format string.
+ *
+ * @param variant (in) pack or unpack
+ * @param format (in, out) the start of the next directive to parse on calling,
+ *     and advanced beyond the parsed directive on return, or as much of it as
+ *     was consumed until an error was encountered
+ * @param format_end (in) the end of the format string
+ * @param type (out) the type of the directive
+ * @param signed_type (out) whether the value is signed
+ * @param endian (out) the endianness of the value
+ * @param size (out) the size of the value
+ * @param length_type (out) what kind of length is specified
+ * @param length (out) the length of the directive
+ * @param encoding (in, out) takes the current encoding of the string which
+ *     would result from parsing the whole format string, and returns a possibly
+ *     changed directive - the encoding should be `PM_PACK_ENCODING_START` when
+ *     pm_pack_parse is called for the first directive in a format string
+ *
+ * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
+ * @note Consult Ruby documentation for the meaning of directives.
+ */
+PRISM_EXPORTED_FUNCTION pm_pack_result
+pm_pack_parse(
+    pm_pack_variant variant,
+    const char **format,
+    const char *format_end,
+    pm_pack_type *type,
+    pm_pack_signed *signed_type,
+    pm_pack_endian *endian,
+    pm_pack_size *size,
+    pm_pack_length_type *length_type,
+    uint64_t *length,
+    pm_pack_encoding *encoding
+);
+
+/**
+ * Prism abstracts sizes away from the native system - this converts an abstract
+ * size to a native size.
+ *
+ * @param size The abstract size to convert.
+ * @return The native size.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
+
+#endif
+
+#endif
diff --git a/prism/parser.h b/prism/parser.h
new file mode 100644
index 0000000000..95d7aac710
--- /dev/null
+++ b/prism/parser.h
@@ -0,0 +1,936 @@
+/**
+ * @file parser.h
+ *
+ * The parser used to parse Ruby source.
+ */
+#ifndef PRISM_PARSER_H
+#define PRISM_PARSER_H
+
+#include "prism/defines.h"
+#include "prism/ast.h"
+#include "prism/encoding.h"
+#include "prism/options.h"
+#include "prism/static_literals.h"
+#include "prism/util/pm_constant_pool.h"
+#include "prism/util/pm_list.h"
+#include "prism/util/pm_newline_list.h"
+#include "prism/util/pm_string.h"
+
+#include <stdbool.h>
+
+/**
+ * This enum provides various bits that represent different kinds of states that
+ * the lexer can track. This is used to determine which kind of token to return
+ * based on the context of the parser.
+ */
+typedef enum {
+    PM_LEX_STATE_BIT_BEG,
+    PM_LEX_STATE_BIT_END,
+    PM_LEX_STATE_BIT_ENDARG,
+    PM_LEX_STATE_BIT_ENDFN,
+    PM_LEX_STATE_BIT_ARG,
+    PM_LEX_STATE_BIT_CMDARG,
+    PM_LEX_STATE_BIT_MID,
+    PM_LEX_STATE_BIT_FNAME,
+    PM_LEX_STATE_BIT_DOT,
+    PM_LEX_STATE_BIT_CLASS,
+    PM_LEX_STATE_BIT_LABEL,
+    PM_LEX_STATE_BIT_LABELED,
+    PM_LEX_STATE_BIT_FITEM
+} pm_lex_state_bit_t;
+
+/**
+ * This enum combines the various bits from the above enum into individual
+ * values that represent the various states of the lexer.
+ */
+typedef enum {
+    PM_LEX_STATE_NONE = 0,
+    PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
+    PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
+    PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
+    PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
+    PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
+    PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
+    PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
+    PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
+    PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
+    PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
+    PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
+    PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
+    PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
+    PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
+    PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
+    PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
+} pm_lex_state_t;
+
+/**
+ * The type of quote that a heredoc uses.
+ */
+typedef enum {
+    PM_HEREDOC_QUOTE_NONE,
+    PM_HEREDOC_QUOTE_SINGLE = '\'',
+    PM_HEREDOC_QUOTE_DOUBLE = '"',
+    PM_HEREDOC_QUOTE_BACKTICK = '`',
+} pm_heredoc_quote_t;
+
+/**
+ * The type of indentation that a heredoc uses.
+ */
+typedef enum {
+    PM_HEREDOC_INDENT_NONE,
+    PM_HEREDOC_INDENT_DASH,
+    PM_HEREDOC_INDENT_TILDE,
+} pm_heredoc_indent_t;
+
+/**
+ * All of the information necessary to store to lexing a heredoc.
+ */
+typedef struct {
+    /** A pointer to the start of the heredoc identifier. */
+    const uint8_t *ident_start;
+
+    /** The length of the heredoc identifier. */
+    size_t ident_length;
+
+    /** The type of quote that the heredoc uses. */
+    pm_heredoc_quote_t quote;
+
+    /** The type of indentation that the heredoc uses. */
+    pm_heredoc_indent_t indent;
+} pm_heredoc_lex_mode_t;
+
+/**
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
+ * kind of token it is currently lexing. For example, when we find the start of
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
+ * are found as part of a string.
+ */
+typedef struct pm_lex_mode {
+    /** The type of this lex mode. */
+    enum {
+        /** This state is used when any given token is being lexed. */
+        PM_LEX_DEFAULT,
+
+        /**
+         * This state is used when we're lexing as normal but inside an embedded
+         * expression of a string.
+         */
+        PM_LEX_EMBEXPR,
+
+        /**
+         * This state is used when we're lexing a variable that is embedded
+         * directly inside of a string with the # shorthand.
+         */
+        PM_LEX_EMBVAR,
+
+        /** This state is used when you are inside the content of a heredoc. */
+        PM_LEX_HEREDOC,
+
+        /**
+         * This state is used when we are lexing a list of tokens, as in a %w
+         * word list literal or a %i symbol list literal.
+         */
+        PM_LEX_LIST,
+
+        /**
+         * This state is used when a regular expression has been begun and we
+         * are looking for the terminator.
+         */
+        PM_LEX_REGEXP,
+
+        /**
+         * This state is used when we are lexing a string or a string-like
+         * token, as in string content with either quote or an xstring.
+         */
+        PM_LEX_STRING
+    } mode;
+
+    /** The data associated with this type of lex mode. */
+    union {
+        struct {
+            /** This keeps track of the nesting level of the list. */
+            size_t nesting;
+
+            /** Whether or not interpolation is allowed in this list. */
+            bool interpolation;
+
+            /**
+             * When lexing a list, it takes into account balancing the
+             * terminator if the terminator is one of (), [], {}, or <>.
+             */
+            uint8_t incrementor;
+
+            /** This is the terminator of the list literal. */
+            uint8_t terminator;
+
+            /**
+             * This is the character set that should be used to delimit the
+             * tokens within the list.
+             */
+            uint8_t breakpoints[11];
+        } list;
+
+        struct {
+            /**
+             * This keeps track of the nesting level of the regular expression.
+             */
+            size_t nesting;
+
+            /**
+             * When lexing a regular expression, it takes into account balancing
+             * the terminator if the terminator is one of (), [], {}, or <>.
+             */
+            uint8_t incrementor;
+
+            /** This is the terminator of the regular expression. */
+            uint8_t terminator;
+
+            /**
+             * This is the character set that should be used to delimit the
+             * tokens within the regular expression.
+             */
+            uint8_t breakpoints[7];
+        } regexp;
+
+        struct {
+            /** This keeps track of the nesting level of the string. */
+            size_t nesting;
+
+            /** Whether or not interpolation is allowed in this string. */
+            bool interpolation;
+
+            /**
+             * Whether or not at the end of the string we should allow a :,
+             * which would indicate this was a dynamic symbol instead of a
+             * string.
+             */
+            bool label_allowed;
+
+            /**
+             * When lexing a string, it takes into account balancing the
+             * terminator if the terminator is one of (), [], {}, or <>.
+             */
+            uint8_t incrementor;
+
+            /**
+             * This is the terminator of the string. It is typically either a
+             * single or double quote.
+             */
+            uint8_t terminator;
+
+            /**
+             * This is the character set that should be used to delimit the
+             * tokens within the string.
+             */
+            uint8_t breakpoints[7];
+        } string;
+
+        struct {
+            /**
+             * All of the data necessary to lex a heredoc.
+             */
+            pm_heredoc_lex_mode_t base;
+
+            /**
+             * This is the pointer to the character where lexing should resume
+             * once the heredoc has been completely processed.
+             */
+            const uint8_t *next_start;
+
+            /**
+             * This is used to track the amount of common whitespace on each
+             * line so that we know how much to dedent each line in the case of
+             * a tilde heredoc.
+             */
+            size_t *common_whitespace;
+
+            /** True if the previous token ended with a line continuation. */
+            bool line_continuation;
+        } heredoc;
+    } as;
+
+    /** The previous lex state so that it knows how to pop. */
+    struct pm_lex_mode *prev;
+} pm_lex_mode_t;
+
+/**
+ * We pre-allocate a certain number of lex states in order to avoid having to
+ * call malloc too many times while parsing. You really shouldn't need more than
+ * this because you only really nest deeply when doing string interpolation.
+ */
+#define PM_LEX_STACK_SIZE 4
+
+/**
+ * The parser used to parse Ruby source.
+ */
+typedef struct pm_parser pm_parser_t;
+
+/**
+ * While parsing, we keep track of a stack of contexts. This is helpful for
+ * error recovery so that we can pop back to a previous context when we hit a
+ * token that is understood by a parent context but not by the current context.
+ */
+typedef enum {
+    /** a null context, used for returning a value from a function */
+    PM_CONTEXT_NONE = 0,
+
+    /** a begin statement */
+    PM_CONTEXT_BEGIN,
+
+    /** an ensure statement with an explicit begin */
+    PM_CONTEXT_BEGIN_ENSURE,
+
+    /** a rescue else statement with an explicit begin */
+    PM_CONTEXT_BEGIN_ELSE,
+
+    /** a rescue statement with an explicit begin */
+    PM_CONTEXT_BEGIN_RESCUE,
+
+    /** expressions in block arguments using braces */
+    PM_CONTEXT_BLOCK_BRACES,
+
+    /** expressions in block arguments using do..end */
+    PM_CONTEXT_BLOCK_KEYWORDS,
+
+    /** an ensure statement within a do..end block */
+    PM_CONTEXT_BLOCK_ENSURE,
+
+    /** a rescue else statement within a do..end block */
+    PM_CONTEXT_BLOCK_ELSE,
+
+    /** expressions in block parameters `foo do |...| end ` */
+    PM_CONTEXT_BLOCK_PARAMETERS,
+
+    /** a rescue statement within a do..end block */
+    PM_CONTEXT_BLOCK_RESCUE,
+
+    /** a case when statements */
+    PM_CONTEXT_CASE_WHEN,
+
+    /** a case in statements */
+    PM_CONTEXT_CASE_IN,
+
+    /** a class declaration */
+    PM_CONTEXT_CLASS,
+
+    /** an ensure statement within a class statement */
+    PM_CONTEXT_CLASS_ENSURE,
+
+    /** a rescue else statement within a class statement */
+    PM_CONTEXT_CLASS_ELSE,
+
+    /** a rescue statement within a class statement */
+    PM_CONTEXT_CLASS_RESCUE,
+
+    /** a method definition */
+    PM_CONTEXT_DEF,
+
+    /** an ensure statement within a method definition */
+    PM_CONTEXT_DEF_ENSURE,
+
+    /** a rescue else statement within a method definition */
+    PM_CONTEXT_DEF_ELSE,
+
+    /** a rescue statement within a method definition */
+    PM_CONTEXT_DEF_RESCUE,
+
+    /** a method definition's parameters */
+    PM_CONTEXT_DEF_PARAMS,
+
+    /** a defined? expression */
+    PM_CONTEXT_DEFINED,
+
+    /** a method definition's default parameter */
+    PM_CONTEXT_DEFAULT_PARAMS,
+
+    /** an else clause */
+    PM_CONTEXT_ELSE,
+
+    /** an elsif clause */
+    PM_CONTEXT_ELSIF,
+
+    /** an interpolated expression */
+    PM_CONTEXT_EMBEXPR,
+
+    /** a for loop */
+    PM_CONTEXT_FOR,
+
+    /** a for loop's index */
+    PM_CONTEXT_FOR_INDEX,
+
+    /** an if statement */
+    PM_CONTEXT_IF,
+
+    /** a lambda expression with braces */
+    PM_CONTEXT_LAMBDA_BRACES,
+
+    /** a lambda expression with do..end */
+    PM_CONTEXT_LAMBDA_DO_END,
+
+    /** an ensure statement within a lambda expression */
+    PM_CONTEXT_LAMBDA_ENSURE,
+
+    /** a rescue else statement within a lambda expression */
+    PM_CONTEXT_LAMBDA_ELSE,
+
+    /** a rescue statement within a lambda expression */
+    PM_CONTEXT_LAMBDA_RESCUE,
+
+    /** the predicate clause of a loop statement */
+    PM_CONTEXT_LOOP_PREDICATE,
+
+    /** the top level context */
+    PM_CONTEXT_MAIN,
+
+    /** a module declaration */
+    PM_CONTEXT_MODULE,
+
+    /** an ensure statement within a module statement */
+    PM_CONTEXT_MODULE_ENSURE,
+
+    /** a rescue else statement within a module statement */
+    PM_CONTEXT_MODULE_ELSE,
+
+    /** a rescue statement within a module statement */
+    PM_CONTEXT_MODULE_RESCUE,
+
+    /** a multiple target expression */
+    PM_CONTEXT_MULTI_TARGET,
+
+    /** a parenthesized expression */
+    PM_CONTEXT_PARENS,
+
+    /** an END block */
+    PM_CONTEXT_POSTEXE,
+
+    /** a predicate inside an if/elsif/unless statement */
+    PM_CONTEXT_PREDICATE,
+
+    /** a BEGIN block */
+    PM_CONTEXT_PREEXE,
+
+    /** a modifier rescue clause */
+    PM_CONTEXT_RESCUE_MODIFIER,
+
+    /** a singleton class definition */
+    PM_CONTEXT_SCLASS,
+
+    /** an ensure statement with a singleton class */
+    PM_CONTEXT_SCLASS_ENSURE,
+
+    /** a rescue else statement with a singleton class */
+    PM_CONTEXT_SCLASS_ELSE,
+
+    /** a rescue statement with a singleton class */
+    PM_CONTEXT_SCLASS_RESCUE,
+
+    /** a ternary expression */
+    PM_CONTEXT_TERNARY,
+
+    /** an unless statement */
+    PM_CONTEXT_UNLESS,
+
+    /** an until statement */
+    PM_CONTEXT_UNTIL,
+
+    /** a while statement */
+    PM_CONTEXT_WHILE,
+} pm_context_t;
+
+/** This is a node in a linked list of contexts. */
+typedef struct pm_context_node {
+    /** The context that this node represents. */
+    pm_context_t context;
+
+    /** A pointer to the previous context in the linked list. */
+    struct pm_context_node *prev;
+} pm_context_node_t;
+
+/** This is the type of a comment that we've found while parsing. */
+typedef enum {
+    PM_COMMENT_INLINE,
+    PM_COMMENT_EMBDOC
+} pm_comment_type_t;
+
+/**
+ * This is a node in the linked list of comments that we've found while parsing.
+ *
+ * @extends pm_list_node_t
+ */
+typedef struct pm_comment {
+    /** The embedded base node. */
+    pm_list_node_t node;
+
+    /** The location of the comment in the source. */
+    pm_location_t location;
+
+    /** The type of comment that we've found. */
+    pm_comment_type_t type;
+} pm_comment_t;
+
+/**
+ * This is a node in the linked list of magic comments that we've found while
+ * parsing.
+ *
+ * @extends pm_list_node_t
+ */
+typedef struct {
+    /** The embedded base node. */
+    pm_list_node_t node;
+
+    /** A pointer to the start of the key in the source. */
+    const uint8_t *key_start;
+
+    /** A pointer to the start of the value in the source. */
+    const uint8_t *value_start;
+
+    /** The length of the key in the source. */
+    uint32_t key_length;
+
+    /** The length of the value in the source. */
+    uint32_t value_length;
+} pm_magic_comment_t;
+
+/**
+ * When the encoding that is being used to parse the source is changed by prism,
+ * we provide the ability here to call out to a user-defined function.
+ */
+typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
+
+/**
+ * When you are lexing through a file, the lexer needs all of the information
+ * that the parser additionally provides (for example, the local table). So if
+ * you want to properly lex Ruby, you need to actually lex it in the context of
+ * the parser. In order to provide this functionality, we optionally allow a
+ * struct to be attached to the parser that calls back out to a user-provided
+ * callback when each token is lexed.
+ */
+typedef struct {
+    /**
+     * This opaque pointer is used to provide whatever information the user
+     * deemed necessary to the callback. In our case we use it to pass the array
+     * that the tokens get appended into.
+     */
+    void *data;
+
+    /**
+     * This is the callback that is called when a token is lexed. It is passed
+     * the opaque data pointer, the parser, and the token that was lexed.
+     */
+    void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
+} pm_lex_callback_t;
+
+/** The type of shareable constant value that can be set. */
+typedef uint8_t pm_shareable_constant_value_t;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
+
+/**
+ * This tracks an individual local variable in a certain lexical context, as
+ * well as the number of times is it read.
+ */
+typedef struct {
+    /** The name of the local variable. */
+    pm_constant_id_t name;
+
+    /** The location of the local variable in the source. */
+    pm_location_t location;
+
+    /** The index of the local variable in the local table. */
+    uint32_t index;
+
+    /** The number of times the local variable is read. */
+    uint32_t reads;
+
+    /** The hash of the local variable. */
+    uint32_t hash;
+} pm_local_t;
+
+/**
+ * This is a set of local variables in a certain lexical context (method, class,
+ * module, etc.). We need to track how many times these variables are read in
+ * order to warn if they only get written.
+ */
+typedef struct pm_locals {
+    /** The number of local variables in the set. */
+    uint32_t size;
+
+    /** The capacity of the local variables set. */
+    uint32_t capacity;
+
+    /** The nullable allocated memory for the local variables in the set. */
+    pm_local_t *locals;
+} pm_locals_t;
+
+/** The flags about scope parameters that can be set. */
+typedef uint8_t pm_scope_parameters_t;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
+
+/**
+ * This struct represents a node in a linked list of scopes. Some scopes can see
+ * into their parent scopes, while others cannot.
+ */
+typedef struct pm_scope {
+    /** A pointer to the previous scope in the linked list. */
+    struct pm_scope *previous;
+
+    /** The IDs of the locals in the given scope. */
+    pm_locals_t locals;
+
+    /**
+     * This is a list of the implicit parameters contained within the block.
+     * These will be processed after the block is parsed to determine the kind
+     * of parameters node that should be used and to check if any errors need to
+     * be added.
+     */
+    pm_node_list_t implicit_parameters;
+
+    /**
+     * This is a bitfield that indicates the parameters that are being used in
+     * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
+     * There are three different kinds of parameters that can be used in a
+     * scope:
+     *
+     * - Ordinary parameters (e.g., def foo(bar); end)
+     * - Numbered parameters (e.g., def foo; _1; end)
+     * - The it parameter (e.g., def foo; it; end)
+     *
+     * If ordinary parameters are being used, then certain parameters can be
+     * forwarded to another method/structure. Those are indicated by four
+     * additional bits in the params field. For example, some combinations of:
+     *
+     * - def foo(*); end
+     * - def foo(**); end
+     * - def foo(&); end
+     * - def foo(...); end
+     */
+    pm_scope_parameters_t parameters;
+
+    /**
+     * The current state of constant shareability for this scope. This is
+     * changed by magic shareable_constant_value comments.
+     */
+    pm_shareable_constant_value_t shareable_constant;
+
+    /**
+     * A boolean indicating whether or not this scope can see into its parent.
+     * If closed is true, then the scope cannot see into its parent.
+     */
+    bool closed;
+} pm_scope_t;
+
+/**
+ * A struct that represents a stack of boolean values.
+ */
+typedef uint32_t pm_state_stack_t;
+
+/**
+ * This struct represents the overall parser. It contains a reference to the
+ * source file, as well as pointers that indicate where in the source it's
+ * currently parsing. It also contains the most recent and current token that
+ * it's considering.
+ */
+struct pm_parser {
+    /**
+     * The next node identifier that will be assigned. This is a unique
+     * identifier used to track nodes such that the syntax tree can be dropped
+     * but the node can be found through another parse.
+     */
+    uint32_t node_id;
+
+    /** The current state of the lexer. */
+    pm_lex_state_t lex_state;
+
+    /** Tracks the current nesting of (), [], and {}. */
+    int enclosure_nesting;
+
+    /**
+     * Used to temporarily track the nesting of enclosures to determine if a {
+     * is the beginning of a lambda following the parameters of a lambda.
+     */
+    int lambda_enclosure_nesting;
+
+    /**
+     * Used to track the nesting of braces to ensure we get the correct value
+     * when we are interpolating blocks with braces.
+     */
+    int brace_nesting;
+
+    /**
+     * The stack used to determine if a do keyword belongs to the predicate of a
+     * while, until, or for loop.
+     */
+    pm_state_stack_t do_loop_stack;
+
+    /**
+     * The stack used to determine if a do keyword belongs to the beginning of a
+     * block.
+     */
+    pm_state_stack_t accepts_block_stack;
+
+    /** A stack of lex modes. */
+    struct {
+        /** The current mode of the lexer. */
+        pm_lex_mode_t *current;
+
+        /** The stack of lexer modes. */
+        pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
+
+        /** The current index into the lexer mode stack. */
+        size_t index;
+    } lex_modes;
+
+    /** The pointer to the start of the source. */
+    const uint8_t *start;
+
+    /** The pointer to the end of the source. */
+    const uint8_t *end;
+
+    /** The previous token we were considering. */
+    pm_token_t previous;
+
+    /** The current token we're considering. */
+    pm_token_t current;
+
+    /**
+     * This is a special field set on the parser when we need the parser to jump
+     * to a specific location when lexing the next token, as opposed to just
+     * using the end of the previous token. Normally this is NULL.
+     */
+    const uint8_t *next_start;
+
+    /**
+     * This field indicates the end of a heredoc whose identifier was found on
+     * the current line. If another heredoc is found on the same line, then this
+     * will be moved forward to the end of that heredoc. If no heredocs are
+     * found on a line then this is NULL.
+     */
+    const uint8_t *heredoc_end;
+
+    /** The list of comments that have been found while parsing. */
+    pm_list_t comment_list;
+
+    /** The list of magic comments that have been found while parsing. */
+    pm_list_t magic_comment_list;
+
+    /**
+     * An optional location that represents the location of the __END__ marker
+     * and the rest of the content of the file. This content is loaded into the
+     * DATA constant when the file being parsed is the main file being executed.
+     */
+    pm_location_t data_loc;
+
+    /** The list of warnings that have been found while parsing. */
+    pm_list_t warning_list;
+
+    /** The list of errors that have been found while parsing. */
+    pm_list_t error_list;
+
+    /** The current local scope. */
+    pm_scope_t *current_scope;
+
+    /** The current parsing context. */
+    pm_context_node_t *current_context;
+
+    /**
+     * The hash keys for the hash that is currently being parsed. This is not
+     * usually necessary because it can pass it down the various call chains,
+     * but in the event that you're parsing a hash that is being directly
+     * pushed into another hash with **, we need to share the hash keys so that
+     * we can warn for the nested hash as well.
+     */
+    pm_static_literals_t *current_hash_keys;
+
+    /**
+     * The encoding functions for the current file is attached to the parser as
+     * it's parsing so that it can change with a magic comment.
+     */
+    const pm_encoding_t *encoding;
+
+    /**
+     * When the encoding that is being used to parse the source is changed by
+     * prism, we provide the ability here to call out to a user-defined
+     * function.
+     */
+    pm_encoding_changed_callback_t encoding_changed_callback;
+
+    /**
+     * This pointer indicates where a comment must start if it is to be
+     * considered an encoding comment.
+     */
+    const uint8_t *encoding_comment_start;
+
+    /**
+     * This is an optional callback that can be attached to the parser that will
+     * be called whenever a new token is lexed by the parser.
+     */
+    pm_lex_callback_t *lex_callback;
+
+    /**
+     * This is the path of the file being parsed. We use the filepath when
+     * constructing SourceFileNodes.
+     */
+    pm_string_t filepath;
+
+    /**
+     * This constant pool keeps all of the constants defined throughout the file
+     * so that we can reference them later.
+     */
+    pm_constant_pool_t constant_pool;
+
+    /** This is the list of newline offsets in the source file. */
+    pm_newline_list_t newline_list;
+
+    /**
+     * We want to add a flag to integer nodes that indicates their base. We only
+     * want to parse these once, but we don't have space on the token itself to
+     * communicate this information. So we store it here and pass it through
+     * when we find tokens that we need it for.
+     */
+    pm_node_flags_t integer_base;
+
+    /**
+     * This string is used to pass information from the lexer to the parser. It
+     * is particularly necessary because of escape sequences.
+     */
+    pm_string_t current_string;
+
+    /**
+     * The line number at the start of the parse. This will be used to offset
+     * the line numbers of all of the locations.
+     */
+    int32_t start_line;
+
+    /**
+     * When a string-like expression is being lexed, any byte or escape sequence
+     * that resolves to a value whose top bit is set (i.e., >= 0x80) will
+     * explicitly set the encoding to the same encoding as the source.
+     * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
+     * resolves to a value whose top bit is set, then the encoding will be
+     * explicitly set to UTF-8.
+     *
+     * The _next_ time this happens, if the encoding that is about to become the
+     * explicitly set encoding does not match the previously set explicit
+     * encoding, a mixed encoding error will be emitted.
+     *
+     * When the expression is finished being lexed, the explicit encoding
+     * controls the encoding of the expression. For the most part this means
+     * that the expression will either be encoded in the source encoding or
+     * UTF-8. This holds for all encodings except US-ASCII. If the source is
+     * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
+     * expression will be encoded as ASCII-8BIT.
+     *
+     * Note that if the expression is a list, different elements within the same
+     * list can have different encodings, so this will get reset between each
+     * element. Furthermore all of this only applies to lists that support
+     * interpolation, because otherwise escapes that could change the encoding
+     * are ignored.
+     *
+     * At first glance, it may make more sense for this to live on the lexer
+     * mode, but we need it here to communicate back to the parser for character
+     * literals that do not push a new lexer mode.
+     */
+    const pm_encoding_t *explicit_encoding;
+
+    /**
+     * When parsing block exits (e.g., break, next, redo), we need to validate
+     * that they are in correct contexts. For the most part we can do this by
+     * looking at our parent contexts. However, modifier while and until
+     * expressions can change that context to make block exits valid. In these
+     * cases, we need to keep track of the block exits and then validate them
+     * after the expression has been parsed.
+     *
+     * We use a pointer here because we don't want to keep a whole list attached
+     * since this will only be used in the context of begin/end expressions.
+     */
+    pm_node_list_t *current_block_exits;
+
+    /** The version of prism that we should use to parse. */
+    pm_options_version_t version;
+
+    /** The command line flags given from the options. */
+    uint8_t command_line;
+
+    /**
+     * Whether or not we have found a frozen_string_literal magic comment with
+     * a true or false value.
+     * May be:
+     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+     *  - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+     */
+    int8_t frozen_string_literal;
+
+    /**
+     * Whether or not we are parsing an eval string. This impacts whether or not
+     * we should evaluate if block exits/yields are valid.
+     */
+    bool parsing_eval;
+
+    /**
+     * Whether or not we are parsing a "partial" script, which is a script that
+     * will be evaluated in the context of another script, so we should not
+     * check jumps (next/break/etc.) for validity.
+     */
+    bool partial_script;
+
+    /** Whether or not we're at the beginning of a command. */
+    bool command_start;
+
+    /** Whether or not we're currently recovering from a syntax error. */
+    bool recovering;
+
+    /**
+     * This is very specialized behavior for when you want to parse in a context
+     * that does not respect encoding comments. Its main use case is translating
+     * into the whitequark/parser AST which re-encodes source files in UTF-8
+     * before they are parsed and ignores encoding comments.
+     */
+    bool encoding_locked;
+
+    /**
+     * Whether or not the encoding has been changed by a magic comment. We use
+     * this to provide a fast path for the lexer instead of going through the
+     * function pointer.
+     */
+    bool encoding_changed;
+
+    /**
+     * This flag indicates that we are currently parsing a pattern matching
+     * expression and impacts that calculation of newlines.
+     */
+    bool pattern_matching_newlines;
+
+    /** This flag indicates that we are currently parsing a keyword argument. */
+    bool in_keyword_arg;
+
+    /**
+     * Whether or not the parser has seen a token that has semantic meaning
+     * (i.e., a token that is not a comment or whitespace).
+     */
+    bool semantic_token_seen;
+
+    /**
+     * True if the current regular expression being lexed contains only ASCII
+     * characters.
+     */
+    bool current_regular_expression_ascii_only;
+
+    /**
+     * By default, Ruby always warns about mismatched indentation. This can be
+     * toggled with a magic comment.
+     */
+    bool warn_mismatched_indentation;
+};
+
+#endif
diff --git a/prism/prettyprint.h b/prism/prettyprint.h
new file mode 100644
index 0000000000..5a52b2b6b8
--- /dev/null
+++ b/prism/prettyprint.h
@@ -0,0 +1,34 @@
+/**
+ * @file prettyprint.h
+ *
+ * An AST node pretty-printer.
+ */
+#ifndef PRISM_PRETTYPRINT_H
+#define PRISM_PRETTYPRINT_H
+
+#include "prism/defines.h"
+
+#ifdef PRISM_EXCLUDE_PRETTYPRINT
+
+void pm_prettyprint(void);
+
+#else
+
+#include <stdio.h>
+
+#include "prism/ast.h"
+#include "prism/parser.h"
+#include "prism/util/pm_buffer.h"
+
+/**
+ * Pretty-prints the AST represented by the given node to the given buffer.
+ *
+ * @param output_buffer The buffer to write the pretty-printed AST to.
+ * @param parser The parser that parsed the AST.
+ * @param node The root node of the AST to pretty-print.
+ */
+PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
+
+#endif
+
+#endif
diff --git a/prism/prism.c b/prism/prism.c
new file mode 100644
index 0000000000..b158e505b2
--- /dev/null
+++ b/prism/prism.c
@@ -0,0 +1,22679 @@
+#include "prism.h"
+
+/**
+ * The prism version and the serialization format.
+ */
+const char *
+pm_version(void) {
+    return PRISM_VERSION;
+}
+
+/**
+ * In heredocs, tabs automatically complete up to the next 8 spaces. This is
+ * defined in CRuby as TAB_WIDTH.
+ */
+#define PM_TAB_WHITESPACE_SIZE 8
+
+// Macros for min/max.
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#define MAX(a,b) (((a)>(b))?(a):(b))
+
+/******************************************************************************/
+/* Helpful AST-related macros                                                */
+/******************************************************************************/
+
+#define FL PM_NODE_FLAGS
+#define UP PM_NODE_UPCAST
+
+#define PM_TOKEN_START(token_) ((token_)->start)
+#define PM_TOKEN_END(token_) ((token_)->end)
+
+#define PM_NODE_START(node_) (UP(node_)->location.start)
+#define PM_NODE_END(node_) (UP(node_)->location.end)
+
+#define PM_LOCATION_NULL_VALUE(parser_) ((pm_location_t) { .start = (parser_)->start, .end = (parser_)->start })
+#define PM_LOCATION_TOKEN_VALUE(token_) ((pm_location_t) { .start = PM_TOKEN_START(token_), .end = PM_TOKEN_END(token_) })
+#define PM_LOCATION_NODE_VALUE(node_) ((pm_location_t) { .start = PM_NODE_START(node_), .end = PM_NODE_END(node_) })
+#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? ((pm_location_t) { 0 }) : PM_LOCATION_TOKEN_VALUE(token))
+
+/******************************************************************************/
+/* Lex mode manipulations                                                     */
+/******************************************************************************/
+
+/**
+ * Returns the incrementor character that should be used to increment the
+ * nesting count if one is possible.
+ */
+static inline uint8_t
+lex_mode_incrementor(const uint8_t start) {
+    switch (start) {
+        case '(':
+        case '[':
+        case '{':
+        case '<':
+            return start;
+        default:
+            return '\0';
+    }
+}
+
+/**
+ * Returns the matching character that should be used to terminate a list
+ * beginning with the given character.
+ */
+static inline uint8_t
+lex_mode_terminator(const uint8_t start) {
+    switch (start) {
+        case '(':
+            return ')';
+        case '[':
+            return ']';
+        case '{':
+            return '}';
+        case '<':
+            return '>';
+        default:
+            return start;
+    }
+}
+
+/**
+ * Push a new lex state onto the stack. If we're still within the pre-allocated
+ * space of the lex state stack, then we'll just use a new slot. Otherwise we'll
+ * allocate a new pointer and use that.
+ */
+static bool
+lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
+    lex_mode.prev = parser->lex_modes.current;
+    parser->lex_modes.index++;
+
+    if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
+        parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
+        if (parser->lex_modes.current == NULL) return false;
+
+        *parser->lex_modes.current = lex_mode;
+    } else {
+        parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
+        parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
+    }
+
+    return true;
+}
+
+/**
+ * Push on a new list lex mode.
+ */
+static inline bool
+lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
+    uint8_t incrementor = lex_mode_incrementor(delimiter);
+    uint8_t terminator = lex_mode_terminator(delimiter);
+
+    pm_lex_mode_t lex_mode = {
+        .mode = PM_LEX_LIST,
+        .as.list = {
+            .nesting = 0,
+            .interpolation = interpolation,
+            .incrementor = incrementor,
+            .terminator = terminator
+        }
+    };
+
+    // These are the places where we need to split up the content of the list.
+    // We'll use strpbrk to find the first of these characters.
+    uint8_t *breakpoints = lex_mode.as.list.breakpoints;
+    memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
+    size_t index = 7;
+
+    // Now we'll add the terminator to the list of breakpoints. If the
+    // terminator is not already a NULL byte, add it to the list.
+    if (terminator != '\0') {
+        breakpoints[index++] = terminator;
+    }
+
+    // If interpolation is allowed, then we're going to check for the #
+    // character. Otherwise we'll only look for escapes and the terminator.
+    if (interpolation) {
+        breakpoints[index++] = '#';
+    }
+
+    // If there is an incrementor, then we'll check for that as well.
+    if (incrementor != '\0') {
+        breakpoints[index++] = incrementor;
+    }
+
+    parser->explicit_encoding = NULL;
+    return lex_mode_push(parser, lex_mode);
+}
+
+/**
+ * Push on a new list lex mode that is only used for compatibility. This is
+ * called when we're at the end of the file. We want the parser to be able to
+ * perform its normal error tolerance.
+ */
+static inline bool
+lex_mode_push_list_eof(pm_parser_t *parser) {
+    return lex_mode_push_list(parser, false, '\0');
+}
+
+/**
+ * Push on a new regexp lex mode.
+ */
+static inline bool
+lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
+    pm_lex_mode_t lex_mode = {
+        .mode = PM_LEX_REGEXP,
+        .as.regexp = {
+            .nesting = 0,
+            .incrementor = incrementor,
+            .terminator = terminator
+        }
+    };
+
+    // These are the places where we need to split up the content of the
+    // regular expression. We'll use strpbrk to find the first of these
+    // characters.
+    uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
+    memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+    size_t index = 4;
+
+    // First we'll add the terminator.
+    if (terminator != '\0') {
+        breakpoints[index++] = terminator;
+    }
+
+    // Next, if there is an incrementor, then we'll check for that as well.
+    if (incrementor != '\0') {
+        breakpoints[index++] = incrementor;
+    }
+
+    parser->explicit_encoding = NULL;
+    return lex_mode_push(parser, lex_mode);
+}
+
+/**
+ * Push on a new string lex mode.
+ */
+static inline bool
+lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
+    pm_lex_mode_t lex_mode = {
+        .mode = PM_LEX_STRING,
+        .as.string = {
+            .nesting = 0,
+            .interpolation = interpolation,
+            .label_allowed = label_allowed,
+            .incrementor = incrementor,
+            .terminator = terminator
+        }
+    };
+
+    // These are the places where we need to split up the content of the
+    // string. We'll use strpbrk to find the first of these characters.
+    uint8_t *breakpoints = lex_mode.as.string.breakpoints;
+    memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+    size_t index = 3;
+
+    // Now add in the terminator. If the terminator is not already a NULL byte,
+    // then we'll add it.
+    if (terminator != '\0') {
+        breakpoints[index++] = terminator;
+    }
+
+    // If interpolation is allowed, then we're going to check for the #
+    // character. Otherwise we'll only look for escapes and the terminator.
+    if (interpolation) {
+        breakpoints[index++] = '#';
+    }
+
+    // If we have an incrementor, then we'll add that in as a breakpoint as
+    // well.
+    if (incrementor != '\0') {
+        breakpoints[index++] = incrementor;
+    }
+
+    parser->explicit_encoding = NULL;
+    return lex_mode_push(parser, lex_mode);
+}
+
+/**
+ * Push on a new string lex mode that is only used for compatibility. This is
+ * called when we're at the end of the file. We want the parser to be able to
+ * perform its normal error tolerance.
+ */
+static inline bool
+lex_mode_push_string_eof(pm_parser_t *parser) {
+    return lex_mode_push_string(parser, false, false, '\0', '\0');
+}
+
+/**
+ * Pop the current lex state off the stack. If we're within the pre-allocated
+ * space of the lex state stack, then we'll just decrement the index. Otherwise
+ * we'll free the current pointer and use the previous pointer.
+ */
+static void
+lex_mode_pop(pm_parser_t *parser) {
+    if (parser->lex_modes.index == 0) {
+        parser->lex_modes.current->mode = PM_LEX_DEFAULT;
+    } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
+        parser->lex_modes.index--;
+        parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
+    } else {
+        parser->lex_modes.index--;
+        pm_lex_mode_t *prev = parser->lex_modes.current->prev;
+        xfree(parser->lex_modes.current);
+        parser->lex_modes.current = prev;
+    }
+}
+
+/**
+ * This is the equivalent of IS_lex_state is CRuby.
+ */
+static inline bool
+lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
+    return parser->lex_state & state;
+}
+
+typedef enum {
+    PM_IGNORED_NEWLINE_NONE = 0,
+    PM_IGNORED_NEWLINE_ALL,
+    PM_IGNORED_NEWLINE_PATTERN
+} pm_ignored_newline_type_t;
+
+static inline pm_ignored_newline_type_t
+lex_state_ignored_p(pm_parser_t *parser) {
+    bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
+
+    if (ignored) {
+        return PM_IGNORED_NEWLINE_ALL;
+    } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
+        return PM_IGNORED_NEWLINE_PATTERN;
+    } else {
+        return PM_IGNORED_NEWLINE_NONE;
+    }
+}
+
+static inline bool
+lex_state_beg_p(pm_parser_t *parser) {
+    return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
+}
+
+static inline bool
+lex_state_arg_p(pm_parser_t *parser) {
+    return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
+}
+
+static inline bool
+lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
+    if (parser->current.end >= parser->end) {
+        return false;
+    }
+    return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
+}
+
+static inline bool
+lex_state_end_p(pm_parser_t *parser) {
+    return lex_state_p(parser, PM_LEX_STATE_END_ANY);
+}
+
+/**
+ * This is the equivalent of IS_AFTER_OPERATOR in CRuby.
+ */
+static inline bool
+lex_state_operator_p(pm_parser_t *parser) {
+    return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
+}
+
+/**
+ * Set the state of the lexer. This is defined as a function to be able to put a
+ * breakpoint in it.
+ */
+static inline void
+lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
+    parser->lex_state = state;
+}
+
+#ifndef PM_DEBUG_LOGGING
+/**
+ * Debugging logging will print additional information to stdout whenever the
+ * lexer state changes.
+ */
+#define PM_DEBUG_LOGGING 0
+#endif
+
+#if PM_DEBUG_LOGGING
+PRISM_ATTRIBUTE_UNUSED static void
+debug_state(pm_parser_t *parser) {
+    fprintf(stderr, "STATE: ");
+    bool first = true;
+
+    if (parser->lex_state == PM_LEX_STATE_NONE) {
+        fprintf(stderr, "NONE\n");
+        return;
+    }
+
+#define CHECK_STATE(state) \
+    if (parser->lex_state & state) { \
+        if (!first) fprintf(stderr, "|"); \
+        fprintf(stderr, "%s", #state); \
+        first = false; \
+    }
+
+    CHECK_STATE(PM_LEX_STATE_BEG)
+    CHECK_STATE(PM_LEX_STATE_END)
+    CHECK_STATE(PM_LEX_STATE_ENDARG)
+    CHECK_STATE(PM_LEX_STATE_ENDFN)
+    CHECK_STATE(PM_LEX_STATE_ARG)
+    CHECK_STATE(PM_LEX_STATE_CMDARG)
+    CHECK_STATE(PM_LEX_STATE_MID)
+    CHECK_STATE(PM_LEX_STATE_FNAME)
+    CHECK_STATE(PM_LEX_STATE_DOT)
+    CHECK_STATE(PM_LEX_STATE_CLASS)
+    CHECK_STATE(PM_LEX_STATE_LABEL)
+    CHECK_STATE(PM_LEX_STATE_LABELED)
+    CHECK_STATE(PM_LEX_STATE_FITEM)
+
+#undef CHECK_STATE
+
+    fprintf(stderr, "\n");
+}
+
+static void
+debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
+    fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
+    debug_state(parser);
+    lex_state_set(parser, state);
+    fprintf(stderr, "Now: ");
+    debug_state(parser);
+    fprintf(stderr, "\n");
+}
+
+#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
+#endif
+
+/******************************************************************************/
+/* Command-line macro helpers                                                 */
+/******************************************************************************/
+
+/** True if the parser has the given command-line option. */
+#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
+
+/** True if the -a command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
+
+/** True if the -e command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
+
+/** True if the -l command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
+
+/** True if the -n command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
+
+/** True if the -p command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
+
+/** True if the -x command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
+
+/******************************************************************************/
+/* Diagnostic-related functions                                               */
+/******************************************************************************/
+
+/**
+ * Append an error to the list of errors on the parser.
+ */
+static inline void
+pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
+}
+
+/**
+ * Append an error to the list of errors on the parser using a format string.
+ */
+#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
+    pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * current token.
+ */
+static inline void
+pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
+}
+
+/**
+ * Append an error to the list of errors on the parser using the given location
+ * using a format string.
+ */
+#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
+    PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given node.
+ */
+static inline void
+pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, node->location.start, node->location.end, diag_id);
+}
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given node and a format string.
+ */
+#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
+    PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given node and a format string, and add on the content of the node.
+ */
+#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
+    PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * previous token.
+ */
+static inline void
+pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
+}
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given token.
+ */
+static inline void
+pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, token->start, token->end, diag_id);
+}
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given token and a format string.
+ */
+#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
+    PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+
+/**
+ * Append an error to the list of errors on the parser using the location of the
+ * given token and a format string, and add on the content of the token.
+ */
+#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
+    PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+
+/**
+ * Append a warning to the list of warnings on the parser.
+ */
+static inline void
+pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
+}
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given token.
+ */
+static inline void
+pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+    pm_parser_warn(parser, token->start, token->end, diag_id);
+}
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given node.
+ */
+static inline void
+pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
+    pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
+}
+
+/**
+ * Append a warning to the list of warnings on the parser using a format string.
+ */
+#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
+    pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given token and a format string.
+ */
+#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
+    PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given token and a format string, and add on the content of the token.
+ */
+#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
+    PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+
+/**
+ * Append a warning to the list of warnings on the parser using the location of
+ * the given node and a format string.
+ */
+#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
+    PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+
+/**
+ * Add an error for an expected heredoc terminator. This is a special function
+ * only because it grabs its location off of a lex mode instead of a node or a
+ * token.
+ */
+static void
+pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
+    PM_PARSER_ERR_FORMAT(
+        parser,
+        ident_start,
+        ident_start + ident_length,
+        PM_ERR_HEREDOC_TERM,
+        (int) ident_length,
+        (const char *) ident_start
+    );
+}
+
+/******************************************************************************/
+/* Scope-related functions                                                    */
+/******************************************************************************/
+
+/**
+ * Allocate and initialize a new scope. Push it onto the scope stack.
+ */
+static bool
+pm_parser_scope_push(pm_parser_t *parser, bool closed) {
+    pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
+    if (scope == NULL) return false;
+
+    *scope = (pm_scope_t) {
+        .previous = parser->current_scope,
+        .locals = { 0 },
+        .parameters = PM_SCOPE_PARAMETERS_NONE,
+        .implicit_parameters = { 0 },
+        .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
+        .closed = closed
+    };
+
+    parser->current_scope = scope;
+    return true;
+}
+
+/**
+ * Determine if the current scope is at the top level. This means it is either
+ * the top-level scope or it is open to the top-level.
+ */
+static bool
+pm_parser_scope_toplevel_p(pm_parser_t *parser) {
+    pm_scope_t *scope = parser->current_scope;
+
+    do {
+        if (scope->previous == NULL) return true;
+        if (scope->closed) return false;
+    } while ((scope = scope->previous) != NULL);
+
+    assert(false && "unreachable");
+    return true;
+}
+
+/**
+ * Retrieve the scope at the given depth.
+ */
+static pm_scope_t *
+pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
+    pm_scope_t *scope = parser->current_scope;
+
+    while (depth-- > 0) {
+        assert(scope != NULL);
+        scope = scope->previous;
+    }
+
+    return scope;
+}
+
+typedef enum {
+    PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
+    PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
+    PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
+} pm_scope_forwarding_param_check_result_t;
+
+static pm_scope_forwarding_param_check_result_t
+pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
+    pm_scope_t *scope = parser->current_scope;
+    bool conflict = false;
+
+    while (scope != NULL) {
+        if (scope->parameters & mask) {
+            if (scope->closed) {
+                if (conflict) {
+                    return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
+                } else {
+                    return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
+                }
+            }
+
+            conflict = true;
+        }
+
+        if (scope->closed) break;
+        scope = scope->previous;
+    }
+
+    return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
+}
+
+static void
+pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
+    switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+            // Pass.
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
+            break;
+    }
+}
+
+static void
+pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
+    switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+            // Pass.
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
+            break;
+    }
+}
+
+static void
+pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
+    switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+            // Pass.
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+            // This shouldn't happen, because ... is not allowed in the
+            // declaration of blocks. If we get here, we assume we already have
+            // an error for this.
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+            break;
+    }
+}
+
+static void
+pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
+    switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
+            // Pass.
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
+            break;
+        case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
+            pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
+            break;
+    }
+}
+
+/**
+ * Get the current state of constant shareability.
+ */
+static inline pm_shareable_constant_value_t
+pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
+    return parser->current_scope->shareable_constant;
+}
+
+/**
+ * Set the current state of constant shareability. We'll set it on all of the
+ * open scopes so that reads are quick.
+ */
+static void
+pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
+    pm_scope_t *scope = parser->current_scope;
+
+    do {
+        scope->shareable_constant = shareable_constant;
+    } while (!scope->closed && (scope = scope->previous) != NULL);
+}
+
+/******************************************************************************/
+/* Local variable-related functions                                           */
+/******************************************************************************/
+
+/**
+ * The point at which the set of locals switches from being a list to a hash.
+ */
+#define PM_LOCALS_HASH_THRESHOLD 9
+
+static void
+pm_locals_free(pm_locals_t *locals) {
+    if (locals->capacity > 0) {
+        xfree(locals->locals);
+    }
+}
+
+/**
+ * Use as simple and fast a hash function as we can that still properly mixes
+ * the bits.
+ */
+static uint32_t
+pm_locals_hash(pm_constant_id_t name) {
+    name = ((name >> 16) ^ name) * 0x45d9f3b;
+    name = ((name >> 16) ^ name) * 0x45d9f3b;
+    name = (name >> 16) ^ name;
+    return name;
+}
+
+/**
+ * Resize the locals list to be twice its current size. If the next capacity is
+ * above the threshold for switching to a hash, then we'll switch to a hash.
+ */
+static void
+pm_locals_resize(pm_locals_t *locals) {
+    uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
+    assert(next_capacity > locals->capacity);
+
+    pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
+    if (next_locals == NULL) abort();
+
+    if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
+        if (locals->size > 0) {
+            memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
+        }
+    } else {
+        // If we just switched from a list to a hash, then we need to fill in
+        // the hash values of all of the locals.
+        bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
+        uint32_t mask = next_capacity - 1;
+
+        for (uint32_t index = 0; index < locals->capacity; index++) {
+            pm_local_t *local = &locals->locals[index];
+
+            if (local->name != PM_CONSTANT_ID_UNSET) {
+                if (hash_needed) local->hash = pm_locals_hash(local->name);
+
+                uint32_t hash = local->hash;
+                while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
+                next_locals[hash & mask] = *local;
+            }
+        }
+    }
+
+    pm_locals_free(locals);
+    locals->locals = next_locals;
+    locals->capacity = next_capacity;
+}
+
+/**
+ * Add a new local to the set of locals. This will automatically rehash the
+ * locals if the size is greater than 3/4 of the capacity.
+ *
+ * @param locals The set of locals to add to.
+ * @param name The name of the local.
+ * @param start The source location that represents the start of the local. This
+ *   is used for the location of the warning in case this local is not read.
+ * @param end The source location that represents the end of the local. This is
+ *   used for the location of the warning in case this local is not read.
+ * @param reads The initial number of reads for this local. Usually this is set
+ *   to 0, but for some locals (like parameters) we want to initialize it with
+ *   1 so that we never warn on unused parameters.
+ * @return True if the local was added, and false if the local already exists.
+ */
+static bool
+pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+    if (locals->size >= (locals->capacity / 4 * 3)) {
+        pm_locals_resize(locals);
+    }
+
+    if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
+        for (uint32_t index = 0; index < locals->capacity; index++) {
+            pm_local_t *local = &locals->locals[index];
+
+            if (local->name == PM_CONSTANT_ID_UNSET) {
+                *local = (pm_local_t) {
+                    .name = name,
+                    .location = { .start = start, .end = end },
+                    .index = locals->size++,
+                    .reads = reads,
+                    .hash = 0
+                };
+                return true;
+            } else if (local->name == name) {
+                return false;
+            }
+        }
+    } else {
+        uint32_t mask = locals->capacity - 1;
+        uint32_t hash = pm_locals_hash(name);
+        uint32_t initial_hash = hash;
+
+        do {
+            pm_local_t *local = &locals->locals[hash & mask];
+
+            if (local->name == PM_CONSTANT_ID_UNSET) {
+                *local = (pm_local_t) {
+                    .name = name,
+                    .location = { .start = start, .end = end },
+                    .index = locals->size++,
+                    .reads = reads,
+                    .hash = initial_hash
+                };
+                return true;
+            } else if (local->name == name) {
+                return false;
+            } else {
+                hash++;
+            }
+        } while ((hash & mask) != initial_hash);
+    }
+
+    assert(false && "unreachable");
+    return true;
+}
+
+/**
+ * Finds the index of a local variable in the locals set. If it is not found,
+ * this returns UINT32_MAX.
+ */
+static uint32_t
+pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+    if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
+        for (uint32_t index = 0; index < locals->size; index++) {
+            pm_local_t *local = &locals->locals[index];
+            if (local->name == name) return index;
+        }
+    } else {
+        uint32_t mask = locals->capacity - 1;
+        uint32_t hash = pm_locals_hash(name);
+        uint32_t initial_hash = hash & mask;
+
+        do {
+            pm_local_t *local = &locals->locals[hash & mask];
+
+            if (local->name == PM_CONSTANT_ID_UNSET) {
+                return UINT32_MAX;
+            } else if (local->name == name) {
+                return hash & mask;
+            } else {
+                hash++;
+            }
+        } while ((hash & mask) != initial_hash);
+    }
+
+    return UINT32_MAX;
+}
+
+/**
+ * Called when a variable is read in a certain lexical context. Tracks the read
+ * by adding to the reads count.
+ */
+static void
+pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
+    uint32_t index = pm_locals_find(locals, name);
+    assert(index != UINT32_MAX);
+
+    pm_local_t *local = &locals->locals[index];
+    assert(local->reads < UINT32_MAX);
+
+    local->reads++;
+}
+
+/**
+ * Called when a variable read is transformed into a variable write, because a
+ * write operator is found after the variable name.
+ */
+static void
+pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
+    uint32_t index = pm_locals_find(locals, name);
+    assert(index != UINT32_MAX);
+
+    pm_local_t *local = &locals->locals[index];
+    assert(local->reads > 0);
+
+    local->reads--;
+}
+
+/**
+ * Returns the current number of reads for a local variable.
+ */
+static uint32_t
+pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
+    uint32_t index = pm_locals_find(locals, name);
+    assert(index != UINT32_MAX);
+
+    return locals->locals[index].reads;
+}
+
+/**
+ * Write out the locals into the given list of constant ids in the correct
+ * order. This is used to set the list of locals on the nodes in the tree once
+ * we're sure no additional locals will be added to the set.
+ *
+ * This function is also responsible for warning when a local variable has been
+ * written but not read in certain contexts.
+ */
+static void
+pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
+    pm_constant_id_list_init_capacity(list, locals->size);
+
+    // If we're still below the threshold for switching to a hash, then we only
+    // need to loop over the locals until we hit the size because the locals are
+    // stored in a list.
+    uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
+
+    // We will only warn for unused variables if we're not at the top level, or
+    // if we're parsing a file outside of eval or -e.
+    bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
+
+    for (uint32_t index = 0; index < capacity; index++) {
+        pm_local_t *local = &locals->locals[index];
+
+        if (local->name != PM_CONSTANT_ID_UNSET) {
+            pm_constant_id_list_insert(list, (size_t) local->index, local->name);
+
+            if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
+                pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
+
+                if (constant->length >= 1 && *constant->start != '_') {
+                    PM_PARSER_WARN_FORMAT(
+                        parser,
+                        local->location.start,
+                        local->location.end,
+                        PM_WARN_UNUSED_LOCAL_VARIABLE,
+                        (int) constant->length,
+                        (const char *) constant->start
+                    );
+                }
+            }
+        }
+    }
+}
+
+/******************************************************************************/
+/* Node-related functions                                                     */
+/******************************************************************************/
+
+/**
+ * Retrieve the constant pool id for the given location.
+ */
+static inline pm_constant_id_t
+pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
+}
+
+/**
+ * Retrieve the constant pool id for the given string.
+ */
+static inline pm_constant_id_t
+pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
+    return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
+}
+
+/**
+ * Retrieve the constant pool id for the given static literal C string.
+ */
+static inline pm_constant_id_t
+pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
+    return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
+}
+
+/**
+ * Retrieve the constant pool id for the given token.
+ */
+static inline pm_constant_id_t
+pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
+    return pm_parser_constant_id_location(parser, token->start, token->end);
+}
+
+/**
+ * Retrieve the constant pool id for the given token. If the token is not
+ * provided, then return 0.
+ */
+static inline pm_constant_id_t
+pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
+    return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
+}
+
+/**
+ * Check whether or not the given node is value expression.
+ * If the node is value node, it returns NULL.
+ * If not, it returns the pointer to the node to be inspected as "void expression".
+ */
+static pm_node_t *
+pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
+    pm_node_t *void_node = NULL;
+
+    while (node != NULL) {
+        switch (PM_NODE_TYPE(node)) {
+            case PM_RETURN_NODE:
+            case PM_BREAK_NODE:
+            case PM_NEXT_NODE:
+            case PM_REDO_NODE:
+            case PM_RETRY_NODE:
+            case PM_MATCH_REQUIRED_NODE:
+                return void_node != NULL ? void_node : node;
+            case PM_MATCH_PREDICATE_NODE:
+                return NULL;
+            case PM_BEGIN_NODE: {
+                pm_begin_node_t *cast = (pm_begin_node_t *) node;
+
+                if (cast->ensure_clause != NULL) {
+                    if (cast->rescue_clause != NULL) {
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
+                        if (vn != NULL) return vn;
+                    }
+
+                    if (cast->statements != NULL) {
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+                        if (vn != NULL) return vn;
+                    }
+
+                    node = UP(cast->ensure_clause);
+                } else if (cast->rescue_clause != NULL) {
+                    if (cast->statements == NULL) return NULL;
+
+                    pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+                    if (vn == NULL) return NULL;
+                    if (void_node == NULL) void_node = vn;
+
+                    for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
+                        pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
+                        if (vn == NULL) {
+                            void_node = NULL;
+                            break;
+                        }
+                        if (void_node == NULL) {
+                            void_node = vn;
+                        }
+                    }
+
+                    if (cast->else_clause != NULL) {
+                        node = UP(cast->else_clause);
+                    } else {
+                        return void_node;
+                    }
+                } else {
+                    node = UP(cast->statements);
+                }
+
+                break;
+            }
+            case PM_ENSURE_NODE: {
+                pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
+                node = UP(cast->statements);
+                break;
+            }
+            case PM_PARENTHESES_NODE: {
+                pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
+                node = UP(cast->body);
+                break;
+            }
+            case PM_STATEMENTS_NODE: {
+                pm_statements_node_t *cast = (pm_statements_node_t *) node;
+                node = cast->body.nodes[cast->body.size - 1];
+                break;
+            }
+            case PM_IF_NODE: {
+                pm_if_node_t *cast = (pm_if_node_t *) node;
+                if (cast->statements == NULL || cast->subsequent == NULL) {
+                    return NULL;
+                }
+                pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+                if (vn == NULL) {
+                    return NULL;
+                }
+                if (void_node == NULL) {
+                    void_node = vn;
+                }
+                node = cast->subsequent;
+                break;
+            }
+            case PM_UNLESS_NODE: {
+                pm_unless_node_t *cast = (pm_unless_node_t *) node;
+                if (cast->statements == NULL || cast->else_clause == NULL) {
+                    return NULL;
+                }
+                pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+                if (vn == NULL) {
+                    return NULL;
+                }
+                if (void_node == NULL) {
+                    void_node = vn;
+                }
+                node = UP(cast->else_clause);
+                break;
+            }
+            case PM_ELSE_NODE: {
+                pm_else_node_t *cast = (pm_else_node_t *) node;
+                node = UP(cast->statements);
+                break;
+            }
+            case PM_AND_NODE: {
+                pm_and_node_t *cast = (pm_and_node_t *) node;
+                node = cast->left;
+                break;
+            }
+            case PM_OR_NODE: {
+                pm_or_node_t *cast = (pm_or_node_t *) node;
+                node = cast->left;
+                break;
+            }
+            case PM_LOCAL_VARIABLE_WRITE_NODE: {
+                pm_local_variable_write_node_t *cast = (pm_local_variable_write_node_t *) node;
+
+                pm_scope_t *scope = parser->current_scope;
+                for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
+
+                pm_locals_read(&scope->locals, cast->name);
+                return NULL;
+            }
+            default:
+                return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+static inline void
+pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
+    pm_node_t *void_node = pm_check_value_expression(parser, node);
+    if (void_node != NULL) {
+        pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
+    }
+}
+
+/**
+ * Warn if the given node is a "void" statement.
+ */
+static void
+pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
+    const char *type = NULL;
+    int length = 0;
+
+    switch (PM_NODE_TYPE(node)) {
+        case PM_BACK_REFERENCE_READ_NODE:
+        case PM_CLASS_VARIABLE_READ_NODE:
+        case PM_GLOBAL_VARIABLE_READ_NODE:
+        case PM_INSTANCE_VARIABLE_READ_NODE:
+        case PM_LOCAL_VARIABLE_READ_NODE:
+        case PM_NUMBERED_REFERENCE_READ_NODE:
+            type = "a variable";
+            length = 10;
+            break;
+        case PM_CALL_NODE: {
+            const pm_call_node_t *cast = (const pm_call_node_t *) node;
+            if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
+
+            const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
+            switch (message->length) {
+                case 1:
+                    switch (message->start[0]) {
+                        case '+':
+                        case '-':
+                        case '*':
+                        case '/':
+                        case '%':
+                        case '|':
+                        case '^':
+                        case '&':
+                        case '>':
+                        case '<':
+                            type = (const char *) message->start;
+                            length = 1;
+                            break;
+                    }
+                    break;
+                case 2:
+                    switch (message->start[1]) {
+                        case '=':
+                            if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
+                                type = (const char *) message->start;
+                                length = 2;
+                            }
+                            break;
+                        case '@':
+                            if (message->start[0] == '+' || message->start[0] == '-') {
+                                type = (const char *) message->start;
+                                length = 2;
+                            }
+                            break;
+                        case '*':
+                            if (message->start[0] == '*') {
+                                type = (const char *) message->start;
+                                length = 2;
+                            }
+                            break;
+                    }
+                    break;
+                case 3:
+                    if (memcmp(message->start, "<=>", 3) == 0) {
+                        type = "<=>";
+                        length = 3;
+                    }
+                    break;
+            }
+
+            break;
+        }
+        case PM_CONSTANT_PATH_NODE:
+            type = "::";
+            length = 2;
+            break;
+        case PM_CONSTANT_READ_NODE:
+            type = "a constant";
+            length = 10;
+            break;
+        case PM_DEFINED_NODE:
+            type = "defined?";
+            length = 8;
+            break;
+        case PM_FALSE_NODE:
+            type = "false";
+            length = 5;
+            break;
+        case PM_FLOAT_NODE:
+        case PM_IMAGINARY_NODE:
+        case PM_INTEGER_NODE:
+        case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
+        case PM_INTERPOLATED_STRING_NODE:
+        case PM_RATIONAL_NODE:
+        case PM_REGULAR_EXPRESSION_NODE:
+        case PM_SOURCE_ENCODING_NODE:
+        case PM_SOURCE_FILE_NODE:
+        case PM_SOURCE_LINE_NODE:
+        case PM_STRING_NODE:
+        case PM_SYMBOL_NODE:
+            type = "a literal";
+            length = 9;
+            break;
+        case PM_NIL_NODE:
+            type = "nil";
+            length = 3;
+            break;
+        case PM_RANGE_NODE: {
+            const pm_range_node_t *cast = (const pm_range_node_t *) node;
+
+            if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
+                type = "...";
+                length = 3;
+            } else {
+                type = "..";
+                length = 2;
+            }
+
+            break;
+        }
+        case PM_SELF_NODE:
+            type = "self";
+            length = 4;
+            break;
+        case PM_TRUE_NODE:
+            type = "true";
+            length = 4;
+            break;
+        default:
+            break;
+    }
+
+    if (type != NULL) {
+        PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
+    }
+}
+
+/**
+ * Warn if any of the statements that are not the last statement in the list are
+ * a "void" statement.
+ */
+static void
+pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
+    assert(node->body.size > 0);
+    const size_t size = node->body.size - (last_value ? 1 : 0);
+    for (size_t index = 0; index < size; index++) {
+        pm_void_statement_check(parser, node->body.nodes[index]);
+    }
+}
+
+/**
+ * When we're handling the predicate of a conditional, we need to know our
+ * context in order to determine the kind of warning we should deliver to the
+ * user.
+ */
+typedef enum {
+    PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
+    PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
+    PM_CONDITIONAL_PREDICATE_TYPE_NOT
+} pm_conditional_predicate_type_t;
+
+/**
+ * Add a warning to the parser if the predicate of a conditional is a literal.
+ */
+static void
+pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
+    switch (type) {
+        case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
+            PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
+            break;
+        case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
+            PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
+            break;
+        case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
+            break;
+    }
+}
+
+/**
+ * Return true if the value being written within the predicate of a conditional
+ * is a literal value.
+ */
+static bool
+pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_ARRAY_NODE: {
+            if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
+
+            const pm_array_node_t *cast = (const pm_array_node_t *) node;
+            for (size_t index = 0; index < cast->elements.size; index++) {
+                if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
+            }
+
+            return true;
+        }
+        case PM_HASH_NODE: {
+            if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
+
+            const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
+            for (size_t index = 0; index < cast->elements.size; index++) {
+                const pm_node_t *element = cast->elements.nodes[index];
+                if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
+
+                const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
+                if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
+            }
+
+            return true;
+        }
+        case PM_FALSE_NODE:
+        case PM_FLOAT_NODE:
+        case PM_IMAGINARY_NODE:
+        case PM_INTEGER_NODE:
+        case PM_NIL_NODE:
+        case PM_RATIONAL_NODE:
+        case PM_REGULAR_EXPRESSION_NODE:
+        case PM_SOURCE_ENCODING_NODE:
+        case PM_SOURCE_FILE_NODE:
+        case PM_SOURCE_LINE_NODE:
+        case PM_STRING_NODE:
+        case PM_SYMBOL_NODE:
+        case PM_TRUE_NODE:
+            return true;
+        default:
+            return false;
+    }
+}
+
+/**
+ * Add a warning to the parser if the value that is being written inside of a
+ * predicate to a conditional is a literal.
+ */
+static inline void
+pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
+    if (pm_conditional_predicate_warn_write_literal_p(node)) {
+        pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
+    }
+}
+
+/**
+ * The predicate of conditional nodes can change what would otherwise be regular
+ * nodes into specialized nodes. For example:
+ *
+ * if foo .. bar         => RangeNode becomes FlipFlopNode
+ * if foo and bar .. baz => RangeNode becomes FlipFlopNode
+ * if /foo/              => RegularExpressionNode becomes MatchLastLineNode
+ * if /foo #{bar}/       => InterpolatedRegularExpressionNode becomes InterpolatedMatchLastLineNode
+ *
+ * We also want to warn the user if they're using a static literal as a
+ * predicate or writing a static literal as the predicate.
+ */
+static void
+pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_AND_NODE: {
+            pm_and_node_t *cast = (pm_and_node_t *) node;
+            pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+            pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+            break;
+        }
+        case PM_OR_NODE: {
+            pm_or_node_t *cast = (pm_or_node_t *) node;
+            pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+            pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+            break;
+        }
+        case PM_PARENTHESES_NODE: {
+            pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
+
+            if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
+                pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
+                if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
+            }
+
+            break;
+        }
+        case PM_BEGIN_NODE: {
+            pm_begin_node_t *cast = (pm_begin_node_t *) node;
+            if (cast->statements != NULL) {
+                pm_statements_node_t *statements = cast->statements;
+                if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
+            }
+            break;
+        }
+        case PM_RANGE_NODE: {
+            pm_range_node_t *cast = (pm_range_node_t *) node;
+
+            if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
+            if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
+
+            // Here we change the range node into a flip flop node. We can do
+            // this since the nodes are exactly the same except for the type.
+            // We're only asserting against the size when we should probably
+            // assert against the entire layout, but we'll assume tests will
+            // catch this.
+            assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
+            node->type = PM_FLIP_FLOP_NODE;
+
+            break;
+        }
+        case PM_REGULAR_EXPRESSION_NODE:
+            // Here we change the regular expression node into a match last line
+            // node. We can do this since the nodes are exactly the same except
+            // for the type.
+            assert(sizeof(pm_regular_expression_node_t) == sizeof(pm_match_last_line_node_t));
+            node->type = PM_MATCH_LAST_LINE_NODE;
+
+            if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
+                pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
+            }
+
+            break;
+        case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
+            // Here we change the interpolated regular expression node into an
+            // interpolated match last line node. We can do this since the nodes
+            // are exactly the same except for the type.
+            assert(sizeof(pm_interpolated_regular_expression_node_t) == sizeof(pm_interpolated_match_last_line_node_t));
+            node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
+
+            if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
+                pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
+            }
+
+            break;
+        case PM_INTEGER_NODE:
+            if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
+                if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
+                    pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
+                }
+            } else {
+                pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
+            }
+            break;
+        case PM_STRING_NODE:
+        case PM_SOURCE_FILE_NODE:
+        case PM_INTERPOLATED_STRING_NODE:
+            pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
+            break;
+        case PM_SYMBOL_NODE:
+        case PM_INTERPOLATED_SYMBOL_NODE:
+            pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
+            break;
+        case PM_SOURCE_LINE_NODE:
+        case PM_SOURCE_ENCODING_NODE:
+        case PM_FLOAT_NODE:
+        case PM_RATIONAL_NODE:
+        case PM_IMAGINARY_NODE:
+            pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
+            break;
+        case PM_CLASS_VARIABLE_WRITE_NODE:
+            pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
+            break;
+        case PM_CONSTANT_WRITE_NODE:
+            pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
+            break;
+        case PM_GLOBAL_VARIABLE_WRITE_NODE:
+            pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
+            break;
+        case PM_INSTANCE_VARIABLE_WRITE_NODE:
+            pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
+            break;
+        case PM_LOCAL_VARIABLE_WRITE_NODE:
+            pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
+            break;
+        case PM_MULTI_WRITE_NODE:
+            pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
+            break;
+        default:
+            break;
+    }
+}
+
+/**
+ * In a lot of places in the tree you can have tokens that are not provided but
+ * that do not cause an error. For example, this happens in a method call
+ * without parentheses. In these cases we set the token to the "not provided" type.
+ * For example:
+ *
+ *     pm_token_t token = not_provided(parser);
+ */
+static inline pm_token_t
+not_provided(pm_parser_t *parser) {
+    return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
+}
+
+/**
+ * This is a special out parameter to the parse_arguments_list function that
+ * includes opening and closing parentheses in addition to the arguments since
+ * it's so common. It is handy to use when passing argument information to one
+ * of the call node creation functions.
+ */
+typedef struct {
+    /** The optional location of the opening parenthesis or bracket. */
+    pm_location_t opening_loc;
+
+    /** The lazily-allocated optional arguments node. */
+    pm_arguments_node_t *arguments;
+
+    /** The optional location of the closing parenthesis or bracket. */
+    pm_location_t closing_loc;
+
+    /** The optional block attached to the call. */
+    pm_node_t *block;
+
+    /** The flag indicating whether this arguments list has forwarding argument. */
+    bool has_forwarding;
+} pm_arguments_t;
+
+/**
+ * Retrieve the end location of a `pm_arguments_t` object.
+ */
+static inline const uint8_t *
+pm_arguments_end(pm_arguments_t *arguments) {
+    if (arguments->block != NULL) {
+        const uint8_t *end = arguments->block->location.end;
+        if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
+            end = arguments->closing_loc.end;
+        }
+        return end;
+    }
+    if (arguments->closing_loc.start != NULL) {
+        return arguments->closing_loc.end;
+    }
+    if (arguments->arguments != NULL) {
+        return arguments->arguments->base.location.end;
+    }
+    return arguments->closing_loc.end;
+}
+
+/**
+ * Check that we're not about to attempt to attach a brace block to a call that
+ * has arguments without parentheses.
+ */
+static void
+pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
+    // First, check that we have arguments and that we don't have a closing
+    // location for them.
+    if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
+        return;
+    }
+
+    // Next, check that we don't have a single parentheses argument. This would
+    // look like:
+    //
+    //     foo (1) {}
+    //
+    // In this case, it's actually okay for the block to be attached to the
+    // call, even though it looks like it's attached to the argument.
+    if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
+        return;
+    }
+
+    // If we didn't hit a case before this check, then at this point we need to
+    // add a syntax error.
+    pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
+}
+
+/******************************************************************************/
+/* Basic character checks                                                     */
+/******************************************************************************/
+
+/**
+ * This function is used extremely frequently to lex all of the identifiers in a
+ * source file, so it's important that it be as fast as possible. For this
+ * reason we have the encoding_changed boolean to check if we need to go through
+ * the function pointer or can just directly use the UTF-8 functions.
+ */
+static inline size_t
+char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
+    if (n <= 0) return 0;
+
+    if (parser->encoding_changed) {
+        size_t width;
+
+        if ((width = parser->encoding->alpha_char(b, n)) != 0) {
+            return width;
+        } else if (*b == '_') {
+            return 1;
+        } else if (*b >= 0x80) {
+            return parser->encoding->char_width(b, n);
+        } else {
+            return 0;
+        }
+    } else if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
+    } else {
+        return pm_encoding_utf_8_char_width(b, n);
+    }
+}
+
+/**
+ * Similar to char_is_identifier but this function assumes that the encoding
+ * has not been changed.
+ */
+static inline size_t
+char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
+    if (n <= 0) {
+        return 0;
+    } else if (*b < 0x80) {
+        return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
+    } else {
+        return pm_encoding_utf_8_char_width(b, n);
+    }
+}
+
+/**
+ * Like the above, this function is also used extremely frequently to lex all of
+ * the identifiers in a source file once the first character has been found. So
+ * it's important that it be as fast as possible.
+ */
+static inline size_t
+char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
+    if (n <= 0) {
+        return 0;
+    } else if (parser->encoding_changed) {
+        size_t width;
+
+        if ((width = parser->encoding->alnum_char(b, n)) != 0) {
+            return width;
+        } else if (*b == '_') {
+            return 1;
+        } else if (*b >= 0x80) {
+            return parser->encoding->char_width(b, n);
+        } else {
+            return 0;
+        }
+    } else {
+        return char_is_identifier_utf8(b, n);
+    }
+}
+
+// Here we're defining a perfect hash for the characters that are allowed in
+// global names. This is used to quickly check the next character after a $ to
+// see if it's a valid character for a global name.
+#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
+#define PUNCT(idx) ( \
+                BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
+                BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
+                BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
+                BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
+                BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
+                BIT('0', idx))
+
+const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
+
+#undef BIT
+#undef PUNCT
+
+static inline bool
+char_is_global_name_punctuation(const uint8_t b) {
+    const unsigned int i = (const unsigned int) b;
+    if (i <= 0x20 || 0x7e < i) return false;
+
+    return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
+}
+
+static inline bool
+token_is_setter_name(pm_token_t *token) {
+    return (
+        (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
+        ((token->type == PM_TOKEN_IDENTIFIER) &&
+        (token->end - token->start >= 2) &&
+        (token->end[-1] == '='))
+    );
+}
+
+/**
+ * Returns true if the given local variable is a keyword.
+ */
+static bool
+pm_local_is_keyword(const char *source, size_t length) {
+#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
+
+    switch (length) {
+        case 2:
+            switch (source[0]) {
+                case 'd': KEYWORD("do"); return false;
+                case 'i': KEYWORD("if"); KEYWORD("in"); return false;
+                case 'o': KEYWORD("or"); return false;
+                default: return false;
+            }
+        case 3:
+            switch (source[0]) {
+                case 'a': KEYWORD("and"); return false;
+                case 'd': KEYWORD("def"); return false;
+                case 'e': KEYWORD("end"); return false;
+                case 'f': KEYWORD("for"); return false;
+                case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
+                default: return false;
+            }
+        case 4:
+            switch (source[0]) {
+                case 'c': KEYWORD("case"); return false;
+                case 'e': KEYWORD("else"); return false;
+                case 'n': KEYWORD("next"); return false;
+                case 'r': KEYWORD("redo"); return false;
+                case 's': KEYWORD("self"); return false;
+                case 't': KEYWORD("then");  KEYWORD("true"); return false;
+                case 'w': KEYWORD("when"); return false;
+                default: return false;
+            }
+        case 5:
+            switch (source[0]) {
+                case 'a': KEYWORD("alias"); return false;
+                case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
+                case 'c': KEYWORD("class"); return false;
+                case 'e': KEYWORD("elsif"); return false;
+                case 'f': KEYWORD("false"); return false;
+                case 'r': KEYWORD("retry"); return false;
+                case 's': KEYWORD("super"); return false;
+                case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
+                case 'w': KEYWORD("while"); return false;
+                case 'y': KEYWORD("yield"); return false;
+                default: return false;
+            }
+        case 6:
+            switch (source[0]) {
+                case 'e': KEYWORD("ensure"); return false;
+                case 'm': KEYWORD("module"); return false;
+                case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
+                case 'u': KEYWORD("unless"); return false;
+                default: return false;
+            }
+        case 8:
+            KEYWORD("__LINE__");
+            KEYWORD("__FILE__");
+            return false;
+        case 12:
+            KEYWORD("__ENCODING__");
+            return false;
+        default:
+            return false;
+    }
+
+#undef KEYWORD
+}
+
+/******************************************************************************/
+/* Node flag handling functions                                               */
+/******************************************************************************/
+
+/**
+ * Set the given flag on the given node.
+ */
+static inline void
+pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
+    node->flags |= flag;
+}
+
+/**
+ * Remove the given flag from the given node.
+ */
+static inline void
+pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
+    node->flags &= (pm_node_flags_t) ~flag;
+}
+
+/**
+ * Set the repeated parameter flag on the given node.
+ */
+static inline void
+pm_node_flag_set_repeated_parameter(pm_node_t *node) {
+    assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
+            PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
+            PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
+            PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
+            PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
+            PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
+            PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
+            PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
+
+    pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
+}
+
+/******************************************************************************/
+/* Node creation functions                                                    */
+/******************************************************************************/
+
+/**
+ * When you have an encoding flag on a regular expression, it takes precedence
+ * over all of the previously set encoding flags. So we need to mask off any
+ * previously set encoding flags before setting the new one.
+ */
+#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
+
+/**
+ * Parse out the options for a regular expression.
+ */
+static inline pm_node_flags_t
+pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
+    pm_node_flags_t flags = 0;
+
+    if (closing->type == PM_TOKEN_REGEXP_END) {
+        pm_buffer_t unknown_flags = { 0 };
+
+        for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
+            switch (*flag) {
+                case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
+                case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
+                case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
+                case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
+
+                case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
+                case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
+                case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
+                case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
+
+                default: pm_buffer_append_byte(&unknown_flags, *flag);
+            }
+        }
+
+        size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
+        if (unknown_flags_length != 0) {
+            const char *word = unknown_flags_length >= 2 ? "options" : "option";
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
+        }
+        pm_buffer_free(&unknown_flags);
+    }
+
+    return flags;
+}
+
+#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
+
+static pm_statements_node_t *
+pm_statements_node_create(pm_parser_t *parser);
+
+static void
+pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
+
+static size_t
+pm_statements_node_body_length(pm_statements_node_t *node);
+
+/**
+ * This function is here to allow us a place to extend in the future when we
+ * implement our own arena allocation.
+ */
+static inline void *
+pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
+    void *memory = xcalloc(1, size);
+    if (memory == NULL) {
+        fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
+        abort();
+    }
+    return memory;
+}
+
+#define PM_NODE_ALLOC(parser_, type_) (type_ *) pm_node_alloc(parser_, sizeof(type_))
+#define PM_NODE_INIT(parser_, type_, flags_, start_, end_) (pm_node_t) { \
+    .type = (type_), \
+    .flags = (flags_), \
+    .node_id = ++(parser_)->node_id, \
+    .location = { .start = (start_), .end = (end_) } \
+}
+
+#define PM_NODE_INIT_UNSET(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, NULL, NULL)
+#define PM_NODE_INIT_BASE(parser_, type_, flags_) PM_NODE_INIT(parser_, type_, flags_, (parser_)->start, (parser_)->start)
+#define PM_NODE_INIT_TOKEN(parser_, type_, flags_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_TOKEN_END(token_))
+#define PM_NODE_INIT_NODE(parser_, type_, flags_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_NODE_END(node_))
+
+#define PM_NODE_INIT_TOKENS(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(left_), PM_TOKEN_END(right_))
+#define PM_NODE_INIT_NODES(parser_, type_, flags_, left_, right_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(left_), PM_NODE_END(right_))
+#define PM_NODE_INIT_TOKEN_NODE(parser_, type_, flags_, token_, node_) PM_NODE_INIT(parser_, type_, flags_, PM_TOKEN_START(token_), PM_NODE_END(node_))
+#define PM_NODE_INIT_NODE_TOKEN(parser_, type_, flags_, node_, token_) PM_NODE_INIT(parser_, type_, flags_, PM_NODE_START(node_), PM_TOKEN_END(token_))
+
+/**
+ * Allocate a new MissingNode node.
+ */
+static pm_missing_node_t *
+pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
+
+    *node = (pm_missing_node_t) {
+        .base = PM_NODE_INIT(parser, PM_MISSING_NODE, 0, start, end)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new AliasGlobalVariableNode node.
+ */
+static pm_alias_global_variable_node_t *
+pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
+    assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
+    pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
+
+    *node = (pm_alias_global_variable_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_GLOBAL_VARIABLE_NODE, 0, keyword, old_name),
+        .new_name = new_name,
+        .old_name = old_name,
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new AliasMethodNode node.
+ */
+static pm_alias_method_node_t *
+pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
+    assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
+    pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
+
+    *node = (pm_alias_method_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_ALIAS_METHOD_NODE, 0, keyword, old_name),
+        .new_name = new_name,
+        .old_name = old_name,
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new AlternationPatternNode node.
+ */
+static pm_alternation_pattern_node_t *
+pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
+    pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
+
+    *node = (pm_alternation_pattern_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_ALTERNATION_PATTERN_NODE, 0, left, right),
+        .left = left,
+        .right = right,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new and node.
+ */
+static pm_and_node_t *
+pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
+    pm_assert_value_expression(parser, left);
+
+    pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
+
+    *node = (pm_and_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_AND_NODE, 0, left, right),
+        .left = left,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .right = right
+    };
+
+    return node;
+}
+
+/**
+ * Allocate an initialize a new arguments node.
+ */
+static pm_arguments_node_t *
+pm_arguments_node_create(pm_parser_t *parser) {
+    pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
+
+    *node = (pm_arguments_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_ARGUMENTS_NODE, 0),
+        .arguments = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Return the size of the given arguments node.
+ */
+static size_t
+pm_arguments_node_size(pm_arguments_node_t *node) {
+    return node->arguments.size;
+}
+
+/**
+ * Append an argument to an arguments node.
+ */
+static void
+pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
+    if (pm_arguments_node_size(node) == 0) {
+        node->base.location.start = argument->location.start;
+    }
+
+    if (node->base.location.end < argument->location.end) {
+        node->base.location.end = argument->location.end;
+    }
+
+    pm_node_list_append(&node->arguments, argument);
+
+    if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
+        if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+            pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
+        } else {
+            pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
+        }
+    }
+}
+
+/**
+ * Allocate and initialize a new ArrayNode node.
+ */
+static pm_array_node_t *
+pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
+    pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
+
+    *node = (pm_array_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_ARRAY_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .elements = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append an argument to an array node.
+ */
+static inline void
+pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
+    if (!node->elements.size && !node->opening_loc.start) {
+        node->base.location.start = element->location.start;
+    }
+
+    pm_node_list_append(&node->elements, element);
+    node->base.location.end = element->location.end;
+
+    // If the element is not a static literal, then the array is not a static
+    // literal. Turn that flag off.
+    if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
+        pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+    }
+
+    if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
+        pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
+    }
+}
+
+/**
+ * Set the closing token and end location of an array node.
+ */
+static void
+pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
+    assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
+    node->base.location.end = closing->end;
+    node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+}
+
+/**
+ * Allocate and initialize a new array pattern node. The node list given in the
+ * nodes parameter is guaranteed to have at least two nodes.
+ */
+static pm_array_pattern_node_t *
+pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
+    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
+
+    *node = (pm_array_pattern_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_ARRAY_PATTERN_NODE, 0, nodes->nodes[0], nodes->nodes[nodes->size - 1]),
+        .constant = NULL,
+        .rest = NULL,
+        .requireds = { 0 },
+        .posts = { 0 },
+        .opening_loc = { 0 },
+        .closing_loc = { 0 }
+    };
+
+    // For now we're going to just copy over each pointer manually. This could be
+    // much more efficient, as we could instead resize the node list.
+    bool found_rest = false;
+    pm_node_t *child;
+
+    PM_NODE_LIST_FOREACH(nodes, index, child) {
+        if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
+            node->rest = child;
+            found_rest = true;
+        } else if (found_rest) {
+            pm_node_list_append(&node->posts, child);
+        } else {
+            pm_node_list_append(&node->requireds, child);
+        }
+    }
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new array pattern node from a single rest node.
+ */
+static pm_array_pattern_node_t *
+pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
+    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
+
+    *node = (pm_array_pattern_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_ARRAY_PATTERN_NODE, 0, rest),
+        .constant = NULL,
+        .rest = rest,
+        .requireds = { 0 },
+        .posts = { 0 },
+        .opening_loc = { 0 },
+        .closing_loc = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new array pattern node from a constant and opening
+ * and closing tokens.
+ */
+static pm_array_pattern_node_t *
+pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
+
+    *node = (pm_array_pattern_node_t) {
+        .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_ARRAY_PATTERN_NODE, 0, constant, closing),
+        .constant = constant,
+        .rest = NULL,
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
+        .requireds = { 0 },
+        .posts = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new array pattern node from an opening and closing
+ * token.
+ */
+static pm_array_pattern_node_t *
+pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
+
+    *node = (pm_array_pattern_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_ARRAY_PATTERN_NODE, 0, opening, closing),
+        .constant = NULL,
+        .rest = NULL,
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
+        .requireds = { 0 },
+        .posts = { 0 }
+    };
+
+    return node;
+}
+
+static inline void
+pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
+    pm_node_list_append(&node->requireds, inner);
+}
+
+/**
+ * Allocate and initialize a new assoc node.
+ */
+static pm_assoc_node_t *
+pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
+    pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
+    const uint8_t *end;
+
+    if (value != NULL && value->location.end > key->location.end) {
+        end = value->location.end;
+    } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
+        end = operator->end;
+    } else {
+        end = key->location.end;
+    }
+
+    // Hash string keys will be frozen, so we can mark them as frozen here so
+    // that the compiler picks them up and also when we check for static literal
+    // on the keys it gets factored in.
+    if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
+        key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
+    }
+
+    // If the key and value of this assoc node are both static literals, then
+    // we can mark this node as a static literal.
+    pm_node_flags_t flags = 0;
+    if (
+        !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
+        value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
+    ) {
+        flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
+    }
+
+    *node = (pm_assoc_node_t) {
+        .base = PM_NODE_INIT(parser, PM_ASSOC_NODE, flags, key->location.start, end),
+        .key = key,
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new assoc splat node.
+ */
+static pm_assoc_splat_node_t *
+pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
+    assert(operator->type == PM_TOKEN_USTAR_STAR);
+    pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
+
+    *node = (pm_assoc_splat_node_t) {
+        .base = (
+            (value == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_ASSOC_SPLAT_NODE, 0, operator)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_ASSOC_SPLAT_NODE, 0, operator, value)
+        ),
+        .value = value,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new BackReferenceReadNode node.
+ */
+static pm_back_reference_read_node_t *
+pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    assert(name->type == PM_TOKEN_BACK_REFERENCE);
+    pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
+
+    *node = (pm_back_reference_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_BACK_REFERENCE_READ_NODE, 0, name),
+        .name = pm_parser_constant_id_token(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize new a begin node.
+ */
+static pm_begin_node_t *
+pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
+    pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
+
+    *node = (pm_begin_node_t) {
+        .base = (
+            (statements == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_BEGIN_NODE, 0, begin_keyword)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_BEGIN_NODE, 0, begin_keyword, statements)
+        ),
+        .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
+        .statements = statements,
+        .end_keyword_loc = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Set the rescue clause, optionally start, and end location of a begin node.
+ */
+static void
+pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
+    // If the begin keyword doesn't exist, we set the start on the begin_node
+    if (!node->begin_keyword_loc.start) {
+        node->base.location.start = rescue_clause->base.location.start;
+    }
+    node->base.location.end = rescue_clause->base.location.end;
+    node->rescue_clause = rescue_clause;
+}
+
+/**
+ * Set the else clause and end location of a begin node.
+ */
+static void
+pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
+    node->base.location.end = else_clause->base.location.end;
+    node->else_clause = else_clause;
+}
+
+/**
+ * Set the ensure clause and end location of a begin node.
+ */
+static void
+pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
+    node->base.location.end = ensure_clause->base.location.end;
+    node->ensure_clause = ensure_clause;
+}
+
+/**
+ * Set the end keyword and end location of a begin node.
+ */
+static void
+pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
+    assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
+
+    node->base.location.end = end_keyword->end;
+    node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
+}
+
+/**
+ * Allocate and initialize a new BlockArgumentNode node.
+ */
+static pm_block_argument_node_t *
+pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
+    pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
+
+    *node = (pm_block_argument_node_t) {
+        .base = (
+            (expression == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_BLOCK_ARGUMENT_NODE, 0, operator, expression)
+        ),
+        .expression = expression,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new BlockNode node.
+ */
+static pm_block_node_t *
+pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
+    pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
+
+    *node = (pm_block_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_BLOCK_NODE, 0, opening, closing),
+        .locals = *locals,
+        .parameters = parameters,
+        .body = body,
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new BlockParameterNode node.
+ */
+static pm_block_parameter_node_t *
+pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
+    assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
+    pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
+
+    *node = (pm_block_parameter_node_t) {
+        .base = (
+            (name->type == PM_TOKEN_NOT_PROVIDED)
+            ? PM_NODE_INIT_TOKEN(parser, PM_BLOCK_PARAMETER_NODE, 0, operator)
+            : PM_NODE_INIT_TOKENS(parser, PM_BLOCK_PARAMETER_NODE, 0, operator, name)
+        ),
+        .name = pm_parser_optional_constant_id_token(parser, name),
+        .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new BlockParametersNode node.
+ */
+static pm_block_parameters_node_t *
+pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
+    pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
+
+    const uint8_t *start;
+    if (opening->type != PM_TOKEN_NOT_PROVIDED) {
+        start = opening->start;
+    } else if (parameters != NULL) {
+        start = parameters->base.location.start;
+    } else {
+        start = NULL;
+    }
+
+    const uint8_t *end;
+    if (parameters != NULL) {
+        end = parameters->base.location.end;
+    } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
+        end = opening->end;
+    } else {
+        end = NULL;
+    }
+
+    *node = (pm_block_parameters_node_t) {
+        .base = PM_NODE_INIT(parser, PM_BLOCK_PARAMETERS_NODE, 0, start, end),
+        .parameters = parameters,
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = { 0 },
+        .locals = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Set the closing location of a BlockParametersNode node.
+ */
+static void
+pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
+    assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
+
+    node->base.location.end = closing->end;
+    node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+}
+
+/**
+ * Allocate and initialize a new BlockLocalVariableNode node.
+ */
+static pm_block_local_variable_node_t *
+pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
+
+    *node = (pm_block_local_variable_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_BLOCK_LOCAL_VARIABLE_NODE, 0, name),
+        .name = pm_parser_constant_id_token(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Append a new block-local variable to a BlockParametersNode node.
+ */
+static void
+pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
+    pm_node_list_append(&node->locals, UP(local));
+
+    if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
+    node->base.location.end = local->base.location.end;
+}
+
+/**
+ * Allocate and initialize a new BreakNode node.
+ */
+static pm_break_node_t *
+pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
+    assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
+    pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
+
+    *node = (pm_break_node_t) {
+        .base = (
+            (arguments == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_BREAK_NODE, 0, keyword)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_BREAK_NODE, 0, keyword, arguments)
+        ),
+        .arguments = arguments,
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
+    };
+
+    return node;
+}
+
+// There are certain flags that we want to use internally but don't want to
+// expose because they are not relevant beyond parsing. Therefore we'll define
+// them here and not define them in config.yml/a header file.
+static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
+
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
+
+/**
+ * Allocate and initialize a new CallNode node. This sets everything to NULL or
+ * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden
+ * in the various specializations of this function.
+ */
+static pm_call_node_t *
+pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
+    pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
+
+    *node = (pm_call_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_CALL_NODE, flags),
+        .receiver = NULL,
+        .call_operator_loc = { 0 },
+        .message_loc = { 0 },
+        .opening_loc = { 0 },
+        .arguments = NULL,
+        .closing_loc = { 0 },
+        .equal_loc = { 0 },
+        .block = NULL,
+        .name = 0
+    };
+
+    return node;
+}
+
+/**
+ * Returns the value that the ignore visibility flag should be set to for the
+ * given receiver.
+ */
+static inline pm_node_flags_t
+pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
+    return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from an aref or an aset
+ * expression.
+ */
+static pm_call_node_t *
+pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
+    pm_assert_value_expression(parser, receiver);
+
+    pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
+    if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
+        flags |= PM_CALL_NODE_FLAGS_INDEX;
+    }
+
+    pm_call_node_t *node = pm_call_node_create(parser, flags);
+
+    node->base.location.start = receiver->location.start;
+    node->base.location.end = pm_arguments_end(arguments);
+
+    node->receiver = receiver;
+    node->message_loc.start = arguments->opening_loc.start;
+    node->message_loc.end = arguments->closing_loc.end;
+
+    node->opening_loc = arguments->opening_loc;
+    node->arguments = arguments->arguments;
+    node->closing_loc = arguments->closing_loc;
+    node->block = arguments->block;
+
+    node->name = pm_parser_constant_id_constant(parser, "[]", 2);
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a binary expression.
+ */
+static pm_call_node_t *
+pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
+    pm_assert_value_expression(parser, receiver);
+    pm_assert_value_expression(parser, argument);
+
+    pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
+
+    node->base.location.start = MIN(receiver->location.start, argument->location.start);
+    node->base.location.end = MAX(receiver->location.end, argument->location.end);
+
+    node->receiver = receiver;
+    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+
+    pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+    pm_arguments_node_arguments_append(arguments, argument);
+    node->arguments = arguments;
+
+    node->name = pm_parser_constant_id_token(parser, operator);
+    return node;
+}
+
+static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
+
+/**
+ * Allocate and initialize a new CallNode node from a call expression.
+ */
+static pm_call_node_t *
+pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
+    pm_assert_value_expression(parser, receiver);
+
+    pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
+
+    node->base.location.start = receiver->location.start;
+    const uint8_t *end = pm_arguments_end(arguments);
+    if (end == NULL) {
+        end = message->end;
+    }
+    node->base.location.end = end;
+
+    node->receiver = receiver;
+    node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->opening_loc = arguments->opening_loc;
+    node->arguments = arguments->arguments;
+    node->closing_loc = arguments->closing_loc;
+    node->block = arguments->block;
+
+    if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
+        pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+    }
+
+    /**
+    * If the final character is `@` as is the case for `foo.~@`,
+    * we should ignore the @ in the same way we do for symbols.
+    */
+    node->name = pm_parser_constant_id_location(parser, message->start, parse_operator_symbol_name(message));
+    return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized CallNode node from a call expression.
+ */
+static pm_call_node_t *
+pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
+    pm_call_node_t *node = pm_call_node_create(parser, 0);
+    node->base.location.start = parser->start;
+    node->base.location.end = parser->end;
+
+    node->receiver = receiver;
+    node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
+    node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
+    node->arguments = arguments;
+
+    node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a call to a method name
+ * without a receiver that could not have been a local variable read.
+ */
+static pm_call_node_t *
+pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
+    pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
+
+    node->base.location.start = message->start;
+    node->base.location.end = pm_arguments_end(arguments);
+
+    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->opening_loc = arguments->opening_loc;
+    node->arguments = arguments->arguments;
+    node->closing_loc = arguments->closing_loc;
+    node->block = arguments->block;
+
+    node->name = pm_parser_constant_id_token(parser, message);
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a synthesized call to a
+ * method name with the given arguments.
+ */
+static pm_call_node_t *
+pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
+    pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
+
+    node->base.location = PM_LOCATION_NULL_VALUE(parser);
+    node->arguments = arguments;
+
+    node->name = name;
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a not expression.
+ */
+static pm_call_node_t *
+pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
+    pm_assert_value_expression(parser, receiver);
+    if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
+
+    pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
+
+    node->base.location.start = message->start;
+    if (arguments->closing_loc.start != NULL) {
+        node->base.location.end = arguments->closing_loc.end;
+    } else {
+        assert(receiver != NULL);
+        node->base.location.end = receiver->location.end;
+    }
+
+    node->receiver = receiver;
+    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+    node->opening_loc = arguments->opening_loc;
+    node->arguments = arguments->arguments;
+    node->closing_loc = arguments->closing_loc;
+
+    node->name = pm_parser_constant_id_constant(parser, "!", 1);
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a call shorthand expression.
+ */
+static pm_call_node_t *
+pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
+    pm_assert_value_expression(parser, receiver);
+
+    pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
+
+    node->base.location.start = receiver->location.start;
+    node->base.location.end = pm_arguments_end(arguments);
+
+    node->receiver = receiver;
+    node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+    node->opening_loc = arguments->opening_loc;
+    node->arguments = arguments->arguments;
+    node->closing_loc = arguments->closing_loc;
+    node->block = arguments->block;
+
+    if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
+        pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+    }
+
+    node->name = pm_parser_constant_id_constant(parser, "call", 4);
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a unary operator expression.
+ */
+static pm_call_node_t *
+pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
+    pm_assert_value_expression(parser, receiver);
+
+    pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
+
+    node->base.location.start = operator->start;
+    node->base.location.end = receiver->location.end;
+
+    node->receiver = receiver;
+    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+
+    node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallNode node from a call to a method name
+ * without a receiver that could also have been a local variable read.
+ */
+static pm_call_node_t *
+pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
+    pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
+
+    node->base.location = PM_LOCATION_TOKEN_VALUE(message);
+    node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+
+    node->name = pm_parser_constant_id_token(parser, message);
+    return node;
+}
+
+/**
+ * Returns whether or not this call can be used on the left-hand side of an
+ * operator assignment.
+ */
+static inline bool
+pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
+    return (
+        (node->message_loc.start != NULL) &&
+        (node->message_loc.end[-1] != '!') &&
+        (node->message_loc.end[-1] != '?') &&
+        char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
+        (node->opening_loc.start == NULL) &&
+        (node->arguments == NULL) &&
+        (node->block == NULL)
+    );
+}
+
+/**
+ * Initialize the read name by reading the write name and chopping off the '='.
+ */
+static void
+pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
+    pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
+
+    if (write_constant->length > 0) {
+        size_t length = write_constant->length - 1;
+
+        void *memory = xmalloc(length);
+        memcpy(memory, write_constant->start, length);
+
+        *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
+    } else {
+        // We can get here if the message was missing because of a syntax error.
+        *read_name = pm_parser_constant_id_constant(parser, "", 0);
+    }
+}
+
+/**
+ * Allocate and initialize a new CallAndWriteNode node.
+ */
+static pm_call_and_write_node_t *
+pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(target->block == NULL);
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
+
+    *node = (pm_call_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CALL_AND_WRITE_NODE, FL(target), target, value),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .message_loc = target->message_loc,
+        .read_name = 0,
+        .write_name = target->name,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Validate that index expressions do not have keywords or blocks if we are
+ * parsing as Ruby 3.4+.
+ */
+static void
+pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
+    if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
+        if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
+            pm_node_t *node;
+            PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
+                if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
+                    pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
+                    break;
+                }
+            }
+        }
+
+        if (block != NULL) {
+            pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
+        }
+    }
+}
+
+/**
+ * Allocate and initialize a new IndexAndWriteNode node.
+ */
+static pm_index_and_write_node_t *
+pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
+
+    pm_index_arguments_check(parser, target->arguments, target->block);
+
+    assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
+    *node = (pm_index_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INDEX_AND_WRITE_NODE, FL(target), target, value),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .opening_loc = target->opening_loc,
+        .arguments = target->arguments,
+        .closing_loc = target->closing_loc,
+        .block = (pm_block_argument_node_t *) target->block,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate a new CallOperatorWriteNode node.
+ */
+static pm_call_operator_write_node_t *
+pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(target->block == NULL);
+    pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
+
+    *node = (pm_call_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CALL_OPERATOR_WRITE_NODE, FL(target), target, value),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .message_loc = target->message_loc,
+        .read_name = 0,
+        .write_name = target->name,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate a new IndexOperatorWriteNode node.
+ */
+static pm_index_operator_write_node_t *
+pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
+
+    pm_index_arguments_check(parser, target->arguments, target->block);
+
+    assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
+    *node = (pm_index_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OPERATOR_WRITE_NODE, FL(target), target, value),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .opening_loc = target->opening_loc,
+        .arguments = target->arguments,
+        .closing_loc = target->closing_loc,
+        .block = (pm_block_argument_node_t *) target->block,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallOrWriteNode node.
+ */
+static pm_call_or_write_node_t *
+pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(target->block == NULL);
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
+
+    *node = (pm_call_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CALL_OR_WRITE_NODE, FL(target), target, value),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .message_loc = target->message_loc,
+        .read_name = 0,
+        .write_name = target->name,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new IndexOrWriteNode node.
+ */
+static pm_index_or_write_node_t *
+pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
+
+    pm_index_arguments_check(parser, target->arguments, target->block);
+
+    assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
+    *node = (pm_index_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INDEX_OR_WRITE_NODE, FL(target), target, value),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .opening_loc = target->opening_loc,
+        .arguments = target->arguments,
+        .closing_loc = target->closing_loc,
+        .block = (pm_block_argument_node_t *) target->block,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CallTargetNode node from an existing call
+ * node.
+ */
+static pm_call_target_node_t *
+pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
+    pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
+
+    *node = (pm_call_target_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_CALL_TARGET_NODE, FL(target), target),
+        .receiver = target->receiver,
+        .call_operator_loc = target->call_operator_loc,
+        .name = target->name,
+        .message_loc = target->message_loc
+    };
+
+    /* It is possible to get here where we have parsed an invalid syntax tree
+     * where the call operator was not present. In that case we will have a
+     * problem because it is a required location. In this case we need to fill
+     * it in with a fake location so that the syntax tree remains valid. */
+    if (node->call_operator_loc.start == NULL) {
+        node->call_operator_loc = (pm_location_t) {
+            .start = target->base.location.start,
+            .end = target->base.location.start
+        };
+    }
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new IndexTargetNode node from an existing call
+ * node.
+ */
+static pm_index_target_node_t *
+pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
+    pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
+
+    pm_index_arguments_check(parser, target->arguments, target->block);
+    assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
+
+    *node = (pm_index_target_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_INDEX_TARGET_NODE, FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, target),
+        .receiver = target->receiver,
+        .opening_loc = target->opening_loc,
+        .arguments = target->arguments,
+        .closing_loc = target->closing_loc,
+        .block = (pm_block_argument_node_t *) target->block,
+    };
+
+    // Here we're going to free the target, since it is no longer necessary.
+    // However, we don't want to call `pm_node_destroy` because we want to keep
+    // around all of its children since we just reused them.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CapturePatternNode node.
+ */
+static pm_capture_pattern_node_t *
+pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
+    pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
+
+    *node = (pm_capture_pattern_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CAPTURE_PATTERN_NODE, 0, value, target),
+        .value = value,
+        .target = target,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new CaseNode node.
+ */
+static pm_case_node_t *
+pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
+    pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
+
+    *node = (pm_case_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_NODE, 0, case_keyword, end_keyword),
+        .predicate = predicate,
+        .else_clause = NULL,
+        .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
+        .conditions = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append a new condition to a CaseNode node.
+ */
+static void
+pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
+    assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
+
+    pm_node_list_append(&node->conditions, condition);
+    node->base.location.end = condition->location.end;
+}
+
+/**
+ * Set the else clause of a CaseNode node.
+ */
+static void
+pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
+    node->else_clause = else_clause;
+    node->base.location.end = else_clause->base.location.end;
+}
+
+/**
+ * Set the end location for a CaseNode node.
+ */
+static void
+pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
+    node->base.location.end = end_keyword->end;
+    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+}
+
+/**
+ * Allocate and initialize a new CaseMatchNode node.
+ */
+static pm_case_match_node_t *
+pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
+    pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
+
+    *node = (pm_case_match_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_CASE_MATCH_NODE, 0, case_keyword, end_keyword),
+        .predicate = predicate,
+        .else_clause = NULL,
+        .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
+        .conditions = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append a new condition to a CaseMatchNode node.
+ */
+static void
+pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
+    assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
+
+    pm_node_list_append(&node->conditions, condition);
+    node->base.location.end = condition->location.end;
+}
+
+/**
+ * Set the else clause of a CaseMatchNode node.
+ */
+static void
+pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
+    node->else_clause = else_clause;
+    node->base.location.end = else_clause->base.location.end;
+}
+
+/**
+ * Set the end location for a CaseMatchNode node.
+ */
+static void
+pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
+    node->base.location.end = end_keyword->end;
+    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+}
+
+/**
+ * Allocate a new ClassNode node.
+ */
+static pm_class_node_t *
+pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
+    pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
+
+    *node = (pm_class_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_CLASS_NODE, 0, class_keyword, end_keyword),
+        .locals = *locals,
+        .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
+        .constant_path = constant_path,
+        .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
+        .superclass = superclass,
+        .body = body,
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
+        .name = pm_parser_constant_id_token(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ClassVariableAndWriteNode node.
+ */
+static pm_class_variable_and_write_node_t *
+pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
+
+    *node = (pm_class_variable_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_AND_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ClassVariableOperatorWriteNode node.
+ */
+static pm_class_variable_operator_write_node_t *
+pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
+
+    *node = (pm_class_variable_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ClassVariableOrWriteNode node.
+ */
+static pm_class_variable_or_write_node_t *
+pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
+
+    *node = (pm_class_variable_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_OR_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ClassVariableReadNode node.
+ */
+static pm_class_variable_read_node_t *
+pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_CLASS_VARIABLE);
+    pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
+
+    *node = (pm_class_variable_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_CLASS_VARIABLE_READ_NODE, 0, token),
+        .name = pm_parser_constant_id_token(parser, token)
+    };
+
+    return node;
+}
+
+/**
+ * True if the given node is an implicit array node on a write, as in:
+ *
+ *     a = *b
+ *     a = 1, 2, 3
+ */
+static inline pm_node_flags_t
+pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
+    if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
+        return flags;
+    }
+    return 0;
+}
+
+/**
+ * Initialize a new ClassVariableWriteNode node from a ClassVariableRead node.
+ */
+static pm_class_variable_write_node_t *
+pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
+    pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_class_variable_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CLASS_VARIABLE_WRITE_NODE, flags, read_node, value),
+        .name = read_node->name,
+        .name_loc = PM_LOCATION_NODE_VALUE(UP(read_node)),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantPathAndWriteNode node.
+ */
+static pm_constant_path_and_write_node_t *
+pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
+
+    *node = (pm_constant_path_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_AND_WRITE_NODE, 0, target, value),
+        .target = target,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantPathOperatorWriteNode node.
+ */
+static pm_constant_path_operator_write_node_t *
+pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
+
+    *node = (pm_constant_path_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, 0, target, value),
+        .target = target,
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantPathOrWriteNode node.
+ */
+static pm_constant_path_or_write_node_t *
+pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
+
+    *node = (pm_constant_path_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_OR_WRITE_NODE, 0, target, value),
+        .target = target,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantPathNode node.
+ */
+static pm_constant_path_node_t *
+pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
+    pm_assert_value_expression(parser, parent);
+    pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
+
+    pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
+    if (name_token->type == PM_TOKEN_CONSTANT) {
+        name = pm_parser_constant_id_token(parser, name_token);
+    }
+
+    if (parent == NULL) {
+        *node = (pm_constant_path_node_t) {
+            .base = PM_NODE_INIT_TOKENS(parser, PM_CONSTANT_PATH_NODE, 0, delimiter, name_token),
+            .parent = parent,
+            .name = name,
+            .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
+            .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
+        };
+    } else {
+        *node = (pm_constant_path_node_t) {
+            .base = PM_NODE_INIT_NODE_TOKEN(parser, PM_CONSTANT_PATH_NODE, 0, parent, name_token),
+            .parent = parent,
+            .name = name,
+            .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
+            .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
+        };
+    }
+
+    return node;
+}
+
+/**
+ * Allocate a new ConstantPathWriteNode node.
+ */
+static pm_constant_path_write_node_t *
+pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_constant_path_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_PATH_WRITE_NODE, flags, target, value),
+        .target = target,
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantAndWriteNode node.
+ */
+static pm_constant_and_write_node_t *
+pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
+
+    *node = (pm_constant_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_AND_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantOperatorWriteNode node.
+ */
+static pm_constant_operator_write_node_t *
+pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
+
+    *node = (pm_constant_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OPERATOR_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantOrWriteNode node.
+ */
+static pm_constant_or_write_node_t *
+pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
+
+    *node = (pm_constant_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_OR_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ConstantReadNode node.
+ */
+static pm_constant_read_node_t *
+pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
+    pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
+
+    *node = (pm_constant_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_CONSTANT_READ_NODE, 0, name),
+        .name = pm_parser_constant_id_token(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new ConstantWriteNode node.
+ */
+static pm_constant_write_node_t *
+pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_constant_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_CONSTANT_WRITE_NODE, flags, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Check if the receiver of a `def` node is allowed.
+ */
+static void
+pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_BEGIN_NODE: {
+            const pm_begin_node_t *cast = (pm_begin_node_t *) node;
+            if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
+            break;
+        }
+        case PM_PARENTHESES_NODE: {
+            const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
+            if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
+            break;
+        }
+        case PM_STATEMENTS_NODE: {
+            const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
+            pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
+            break;
+        }
+        case PM_ARRAY_NODE:
+        case PM_FLOAT_NODE:
+        case PM_IMAGINARY_NODE:
+        case PM_INTEGER_NODE:
+        case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
+        case PM_INTERPOLATED_STRING_NODE:
+        case PM_INTERPOLATED_SYMBOL_NODE:
+        case PM_INTERPOLATED_X_STRING_NODE:
+        case PM_RATIONAL_NODE:
+        case PM_REGULAR_EXPRESSION_NODE:
+        case PM_SOURCE_ENCODING_NODE:
+        case PM_SOURCE_FILE_NODE:
+        case PM_SOURCE_LINE_NODE:
+        case PM_STRING_NODE:
+        case PM_SYMBOL_NODE:
+        case PM_X_STRING_NODE:
+            pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
+            break;
+        default:
+            break;
+    }
+}
+
+/**
+ * Allocate and initialize a new DefNode node.
+ */
+static pm_def_node_t *
+pm_def_node_create(
+    pm_parser_t *parser,
+    pm_constant_id_t name,
+    const pm_token_t *name_loc,
+    pm_node_t *receiver,
+    pm_parameters_node_t *parameters,
+    pm_node_t *body,
+    pm_constant_id_list_t *locals,
+    const pm_token_t *def_keyword,
+    const pm_token_t *operator,
+    const pm_token_t *lparen,
+    const pm_token_t *rparen,
+    const pm_token_t *equal,
+    const pm_token_t *end_keyword
+) {
+    pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
+
+    if (receiver != NULL) {
+        pm_def_node_receiver_check(parser, receiver);
+    }
+
+    *node = (pm_def_node_t) {
+        .base = (
+            (end_keyword->type == PM_TOKEN_NOT_PROVIDED)
+            ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEF_NODE, 0, def_keyword, body)
+            : PM_NODE_INIT_TOKENS(parser, PM_DEF_NODE, 0, def_keyword, end_keyword)
+        ),
+        .name = name,
+        .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
+        .receiver = receiver,
+        .parameters = parameters,
+        .body = body,
+        .locals = *locals,
+        .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+        .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
+        .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
+        .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
+        .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new DefinedNode node.
+ */
+static pm_defined_node_t *
+pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
+    pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
+
+    *node = (pm_defined_node_t) {
+        .base = (
+            (rparen->type == PM_TOKEN_NOT_PROVIDED)
+            ? PM_NODE_INIT_TOKEN_NODE(parser, PM_DEFINED_NODE, 0, keyword, value)
+            : PM_NODE_INIT_TOKENS(parser, PM_DEFINED_NODE, 0, keyword, rparen)
+        ),
+        .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
+        .value = value,
+        .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ElseNode node.
+ */
+static pm_else_node_t *
+pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
+    pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
+
+    *node = (pm_else_node_t) {
+        .base = (
+            ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL))
+            ? PM_NODE_INIT_TOKEN_NODE(parser, PM_ELSE_NODE, 0, else_keyword, statements)
+            : PM_NODE_INIT_TOKENS(parser, PM_ELSE_NODE, 0, else_keyword, end_keyword)
+        ),
+        .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
+        .statements = statements,
+        .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new EmbeddedStatementsNode node.
+ */
+static pm_embedded_statements_node_t *
+pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
+    pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
+
+    *node = (pm_embedded_statements_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_EMBEDDED_STATEMENTS_NODE, 0, opening, closing),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .statements = statements,
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new EmbeddedVariableNode node.
+ */
+static pm_embedded_variable_node_t *
+pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
+    pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
+
+    *node = (pm_embedded_variable_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_EMBEDDED_VARIABLE_NODE, 0, operator, variable),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .variable = variable
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new EnsureNode node.
+ */
+static pm_ensure_node_t *
+pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
+    pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
+
+    *node = (pm_ensure_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_ENSURE_NODE, 0, ensure_keyword, end_keyword),
+        .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
+        .statements = statements,
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new FalseNode node.
+ */
+static pm_false_node_t *
+pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_FALSE);
+    pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
+
+    *node = (pm_false_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_FALSE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new find pattern node. The node list given in the
+ * nodes parameter is guaranteed to have at least two nodes.
+ */
+static pm_find_pattern_node_t *
+pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
+    pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
+
+    pm_node_t *left = nodes->nodes[0];
+    assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
+    pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
+
+    pm_node_t *right;
+
+    if (nodes->size == 1) {
+        right = UP(pm_missing_node_create(parser, left->location.end, left->location.end));
+    } else {
+        right = nodes->nodes[nodes->size - 1];
+        assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
+    }
+
+#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
+    // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
+    // The resulting AST will anyway be ignored, but this file still needs to compile.
+    pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
+#else
+    pm_node_t *right_splat_node = right;
+#endif
+    *node = (pm_find_pattern_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_FIND_PATTERN_NODE, 0, left, right),
+        .constant = NULL,
+        .left = left_splat_node,
+        .right = right_splat_node,
+        .requireds = { 0 },
+        .opening_loc = { 0 },
+        .closing_loc = { 0 }
+    };
+
+    // For now we're going to just copy over each pointer manually. This could be
+    // much more efficient, as we could instead resize the node list to only point
+    // to 1...-1.
+    for (size_t index = 1; index < nodes->size - 1; index++) {
+        pm_node_list_append(&node->requireds, nodes->nodes[index]);
+    }
+
+    return node;
+}
+
+/**
+ * Parse the value of a double, add appropriate errors if there is an issue, and
+ * return the value that should be saved on the PM_FLOAT_NODE node.
+ */
+static double
+pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
+    ptrdiff_t diff = token->end - token->start;
+    if (diff <= 0) return 0.0;
+
+    // First, get a buffer of the content.
+    size_t length = (size_t) diff;
+    char *buffer = xmalloc(sizeof(char) * (length + 1));
+    memcpy((void *) buffer, token->start, length);
+
+    // Next, determine if we need to replace the decimal point because of
+    // locale-specific options, and then normalize them if we have to.
+    char decimal_point = *localeconv()->decimal_point;
+    if (decimal_point != '.') {
+        for (size_t index = 0; index < length; index++) {
+            if (buffer[index] == '.') buffer[index] = decimal_point;
+        }
+    }
+
+    // Next, handle underscores by removing them from the buffer.
+    for (size_t index = 0; index < length; index++) {
+        if (buffer[index] == '_') {
+            memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
+            length--;
+        }
+    }
+
+    // Null-terminate the buffer so that strtod cannot read off the end.
+    buffer[length] = '\0';
+
+    // Now, call strtod to parse the value. Note that CRuby has their own
+    // version of strtod which avoids locales. We're okay using the locale-aware
+    // version because we've already validated through the parser that the token
+    // is in a valid format.
+    errno = 0;
+    char *eptr;
+    double value = strtod(buffer, &eptr);
+
+    // This should never happen, because we've already checked that the token
+    // is in a valid format. However it's good to be safe.
+    if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
+        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
+        xfree((void *) buffer);
+        return 0.0;
+    }
+
+    // If errno is set, then it should only be ERANGE. At this point we need to
+    // check if it's infinity (it should be).
+    if (errno == ERANGE && PRISM_ISINF(value)) {
+        int warn_width;
+        const char *ellipsis;
+
+        if (length > 20) {
+            warn_width = 20;
+            ellipsis = "...";
+        } else {
+            warn_width = (int) length;
+            ellipsis = "";
+        }
+
+        pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
+        value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
+    }
+
+    // Finally we can free the buffer and return the value.
+    xfree((void *) buffer);
+    return value;
+}
+
+/**
+ * Allocate and initialize a new FloatNode node.
+ */
+static pm_float_node_t *
+pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_FLOAT);
+    pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
+
+    *node = (pm_float_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_FLOAT_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+        .value = pm_double_parse(parser, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new FloatNode node from a FLOAT_IMAGINARY token.
+ */
+static pm_imaginary_node_t *
+pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
+
+    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
+    *node = (pm_imaginary_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+        .numeric = UP(pm_float_node_create(parser, &((pm_token_t) {
+            .type = PM_TOKEN_FLOAT,
+            .start = token->start,
+            .end = token->end - 1
+        })))
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new RationalNode node from a FLOAT_RATIONAL token.
+ */
+static pm_rational_node_t *
+pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
+
+    pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
+    *node = (pm_rational_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, token),
+        .numerator = { 0 },
+        .denominator = { 0 }
+    };
+
+    const uint8_t *start = token->start;
+    const uint8_t *end = token->end - 1; // r
+
+    while (start < end && *start == '0') start++; // 0.1 -> .1
+    while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
+
+    size_t length = (size_t) (end - start);
+    if (length == 1) {
+        node->denominator.value = 1;
+        return node;
+    }
+
+    const uint8_t *point = memchr(start, '.', length);
+    assert(point && "should have a decimal point");
+
+    uint8_t *digits = xmalloc(length);
+    if (digits == NULL) {
+        fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
+        abort();
+    }
+
+    memcpy(digits, start, (unsigned long) (point - start));
+    memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
+    pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
+
+    size_t fract_length = 0;
+    for (const uint8_t *fract = point; fract < end; ++fract) {
+        if (*fract != '_') ++fract_length;
+    }
+    digits[0] = '1';
+    if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
+    pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
+    xfree(digits);
+
+    pm_integers_reduce(&node->numerator, &node->denominator);
+    return node;
+}
+
+/**
+ * Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL_IMAGINARY
+ * token.
+ */
+static pm_imaginary_node_t *
+pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
+
+    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
+    *node = (pm_imaginary_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+        .numeric = UP(pm_float_node_rational_create(parser, &((pm_token_t) {
+            .type = PM_TOKEN_FLOAT_RATIONAL,
+            .start = token->start,
+            .end = token->end - 1
+        })))
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ForNode node.
+ */
+static pm_for_node_t *
+pm_for_node_create(
+    pm_parser_t *parser,
+    pm_node_t *index,
+    pm_node_t *collection,
+    pm_statements_node_t *statements,
+    const pm_token_t *for_keyword,
+    const pm_token_t *in_keyword,
+    const pm_token_t *do_keyword,
+    const pm_token_t *end_keyword
+) {
+    pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
+
+    *node = (pm_for_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_FOR_NODE, 0, for_keyword, end_keyword),
+        .index = index,
+        .collection = collection,
+        .statements = statements,
+        .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
+        .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ForwardingArgumentsNode node.
+ */
+static pm_forwarding_arguments_node_t *
+pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
+    pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
+
+    *node = (pm_forwarding_arguments_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_ARGUMENTS_NODE, 0, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ForwardingParameterNode node.
+ */
+static pm_forwarding_parameter_node_t *
+pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
+    pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
+
+    *node = (pm_forwarding_parameter_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_PARAMETER_NODE, 0, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ForwardingSuper node.
+ */
+static pm_forwarding_super_node_t *
+pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
+    assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
+    assert(token->type == PM_TOKEN_KEYWORD_SUPER);
+    pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
+
+    pm_block_node_t *block = NULL;
+    if (arguments->block != NULL) {
+        block = (pm_block_node_t *) arguments->block;
+    }
+
+    *node = (pm_forwarding_super_node_t) {
+        .base = (
+            (block == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_FORWARDING_SUPER_NODE, 0, token)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_FORWARDING_SUPER_NODE, 0, token, block)
+        ),
+        .block = block
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new hash pattern node from an opening and closing
+ * token.
+ */
+static pm_hash_pattern_node_t *
+pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
+
+    *node = (pm_hash_pattern_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_HASH_PATTERN_NODE, 0, opening, closing),
+        .constant = NULL,
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
+        .elements = { 0 },
+        .rest = NULL
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new hash pattern node.
+ */
+static pm_hash_pattern_node_t *
+pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
+    pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
+
+    const uint8_t *start;
+    const uint8_t *end;
+
+    if (elements->size > 0) {
+        if (rest) {
+            start = MIN(rest->location.start, elements->nodes[0]->location.start);
+            end = MAX(rest->location.end, elements->nodes[elements->size - 1]->location.end);
+        } else {
+            start = elements->nodes[0]->location.start;
+            end = elements->nodes[elements->size - 1]->location.end;
+        }
+    } else {
+        assert(rest != NULL);
+        start = rest->location.start;
+        end = rest->location.end;
+    }
+
+    *node = (pm_hash_pattern_node_t) {
+        .base = PM_NODE_INIT(parser, PM_HASH_PATTERN_NODE, 0, start, end),
+        .constant = NULL,
+        .elements = { 0 },
+        .rest = rest,
+        .opening_loc = { 0 },
+        .closing_loc = { 0 }
+    };
+
+    pm_node_list_concat(&node->elements, elements);
+    return node;
+}
+
+/**
+ * Retrieve the name from a node that will become a global variable write node.
+ */
+static pm_constant_id_t
+pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
+    switch (PM_NODE_TYPE(target)) {
+        case PM_GLOBAL_VARIABLE_READ_NODE:
+            return ((pm_global_variable_read_node_t *) target)->name;
+        case PM_BACK_REFERENCE_READ_NODE:
+            return ((pm_back_reference_read_node_t *) target)->name;
+        case PM_NUMBERED_REFERENCE_READ_NODE:
+            // This will only ever happen in the event of a syntax error, but we
+            // still need to provide something for the node.
+            return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+        default:
+            assert(false && "unreachable");
+            return (pm_constant_id_t) -1;
+    }
+}
+
+/**
+ * Allocate and initialize a new GlobalVariableAndWriteNode node.
+ */
+static pm_global_variable_and_write_node_t *
+pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
+
+    *node = (pm_global_variable_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
+        .name = pm_global_variable_write_name(parser, target),
+        .name_loc = target->location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new GlobalVariableOperatorWriteNode node.
+ */
+static pm_global_variable_operator_write_node_t *
+pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
+
+    *node = (pm_global_variable_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
+        .name = pm_global_variable_write_name(parser, target),
+        .name_loc = target->location,
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new GlobalVariableOrWriteNode node.
+ */
+static pm_global_variable_or_write_node_t *
+pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
+
+    *node = (pm_global_variable_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
+        .name = pm_global_variable_write_name(parser, target),
+        .name_loc = target->location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new GlobalVariableReadNode node.
+ */
+static pm_global_variable_read_node_t *
+pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
+
+    *node = (pm_global_variable_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0, name),
+        .name = pm_parser_constant_id_token(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized GlobalVariableReadNode node.
+ */
+static pm_global_variable_read_node_t *
+pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
+    pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
+
+    *node = (pm_global_variable_read_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_READ_NODE, 0),
+        .name = name
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new GlobalVariableWriteNode node.
+ */
+static pm_global_variable_write_node_t *
+pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_global_variable_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, flags, target, value),
+        .name = pm_global_variable_write_name(parser, target),
+        .name_loc = PM_LOCATION_NODE_VALUE(target),
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized GlobalVariableWriteNode node.
+ */
+static pm_global_variable_write_node_t *
+pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
+    pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
+
+    *node = (pm_global_variable_write_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_GLOBAL_VARIABLE_WRITE_NODE, 0),
+        .name = name,
+        .name_loc = PM_LOCATION_NULL_VALUE(parser),
+        .operator_loc = PM_LOCATION_NULL_VALUE(parser),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new HashNode node.
+ */
+static pm_hash_node_t *
+pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
+    assert(opening != NULL);
+    pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
+
+    *node = (pm_hash_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_HASH_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_NULL_VALUE(parser),
+        .elements = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append a new element to a hash node.
+ */
+static inline void
+pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
+    pm_node_list_append(&hash->elements, element);
+
+    bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
+    if (static_literal) {
+        pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
+        static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
+        static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
+        static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
+    }
+
+    if (!static_literal) {
+        pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
+    }
+}
+
+static inline void
+pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
+    hash->base.location.end = token->end;
+    hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
+}
+
+/**
+ * Allocate a new IfNode node.
+ */
+static pm_if_node_t *
+pm_if_node_create(pm_parser_t *parser,
+    const pm_token_t *if_keyword,
+    pm_node_t *predicate,
+    const pm_token_t *then_keyword,
+    pm_statements_node_t *statements,
+    pm_node_t *subsequent,
+    const pm_token_t *end_keyword
+) {
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+    pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
+
+    const uint8_t *end;
+    if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
+        end = end_keyword->end;
+    } else if (subsequent != NULL) {
+        end = subsequent->location.end;
+    } else if (pm_statements_node_body_length(statements) != 0) {
+        end = statements->base.location.end;
+    } else {
+        end = predicate->location.end;
+    }
+
+    *node = (pm_if_node_t) {
+        .base = PM_NODE_INIT(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, if_keyword->start, end),
+        .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
+        .predicate = predicate,
+        .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
+        .statements = statements,
+        .subsequent = subsequent,
+        .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize new IfNode node in the modifier form.
+ */
+static pm_if_node_t *
+pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+    pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
+
+    pm_statements_node_t *statements = pm_statements_node_create(parser);
+    pm_statements_node_body_append(parser, statements, statement, true);
+
+    *node = (pm_if_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
+        .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
+        .predicate = predicate,
+        .then_keyword_loc = { 0 },
+        .statements = statements,
+        .subsequent = NULL,
+        .end_keyword_loc = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize an if node from a ternary expression.
+ */
+static pm_if_node_t *
+pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
+    pm_assert_value_expression(parser, predicate);
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+
+    pm_statements_node_t *if_statements = pm_statements_node_create(parser);
+    pm_statements_node_body_append(parser, if_statements, true_expression, true);
+
+    pm_statements_node_t *else_statements = pm_statements_node_create(parser);
+    pm_statements_node_body_append(parser, else_statements, false_expression, true);
+
+    pm_token_t end_keyword = not_provided(parser);
+    pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
+
+    pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
+
+    *node = (pm_if_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_IF_NODE, PM_NODE_FLAG_NEWLINE, predicate, false_expression),
+        .if_keyword_loc = { 0 },
+        .predicate = predicate,
+        .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
+        .statements = if_statements,
+        .subsequent = UP(else_node),
+        .end_keyword_loc = { 0 }
+    };
+
+    return node;
+
+}
+
+static inline void
+pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
+    node->base.location.end = keyword->end;
+    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+}
+
+static inline void
+pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
+    node->base.location.end = keyword->end;
+    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+}
+
+/**
+ * Allocate and initialize a new ImplicitNode node.
+ */
+static pm_implicit_node_t *
+pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
+    pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
+
+    *node = (pm_implicit_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_IMPLICIT_NODE, 0, value),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ImplicitRestNode node.
+ */
+static pm_implicit_rest_node_t *
+pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_COMMA);
+
+    pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
+
+    *node = (pm_implicit_rest_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_IMPLICIT_REST_NODE, 0, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new IntegerNode node.
+ */
+static pm_integer_node_t *
+pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_INTEGER);
+    pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
+
+    *node = (pm_integer_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_INTEGER_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
+        .value = { 0 }
+    };
+
+    pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+    switch (base) {
+        case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+        case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+        case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+        case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+        default: assert(false && "unreachable"); break;
+    }
+
+    pm_integer_parse(&node->value, integer_base, token->start, token->end);
+    return node;
+}
+
+/**
+ * Allocate and initialize a new IntegerNode node from an INTEGER_IMAGINARY
+ * token.
+ */
+static pm_imaginary_node_t *
+pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
+
+    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
+    *node = (pm_imaginary_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+        .numeric = UP(pm_integer_node_create(parser, base, &((pm_token_t) {
+            .type = PM_TOKEN_INTEGER,
+            .start = token->start,
+            .end = token->end - 1
+        })))
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new RationalNode node from an INTEGER_RATIONAL
+ * token.
+ */
+static pm_rational_node_t *
+pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
+
+    pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
+    *node = (pm_rational_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_RATIONAL_NODE, base | PM_NODE_FLAG_STATIC_LITERAL, token),
+        .numerator = { 0 },
+        .denominator = { .value = 1, 0 }
+    };
+
+    pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+    switch (base) {
+        case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+        case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+        case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+        case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+        default: assert(false && "unreachable"); break;
+    }
+
+    pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new IntegerNode node from an
+ * INTEGER_RATIONAL_IMAGINARY token.
+ */
+static pm_imaginary_node_t *
+pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
+
+    pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
+    *node = (pm_imaginary_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_IMAGINARY_NODE, PM_NODE_FLAG_STATIC_LITERAL, token),
+        .numeric = UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
+            .type = PM_TOKEN_INTEGER_RATIONAL,
+            .start = token->start,
+            .end = token->end - 1
+        })))
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new InNode node.
+ */
+static pm_in_node_t *
+pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
+    pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
+
+    const uint8_t *end;
+    if (statements != NULL) {
+        end = statements->base.location.end;
+    } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
+        end = then_keyword->end;
+    } else {
+        end = pattern->location.end;
+    }
+
+    *node = (pm_in_node_t) {
+        .base = PM_NODE_INIT(parser, PM_IN_NODE, 0, in_keyword->start, end),
+        .pattern = pattern,
+        .statements = statements,
+        .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
+        .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new InstanceVariableAndWriteNode node.
+ */
+static pm_instance_variable_and_write_node_t *
+pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
+
+    *node = (pm_instance_variable_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_AND_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new InstanceVariableOperatorWriteNode node.
+ */
+static pm_instance_variable_operator_write_node_t *
+pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
+
+    *node = (pm_instance_variable_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new InstanceVariableOrWriteNode node.
+ */
+static pm_instance_variable_or_write_node_t *
+pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
+
+    *node = (pm_instance_variable_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_OR_WRITE_NODE, 0, target, value),
+        .name = target->name,
+        .name_loc = target->base.location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new InstanceVariableReadNode node.
+ */
+static pm_instance_variable_read_node_t *
+pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
+    pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
+
+    *node = (pm_instance_variable_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_INSTANCE_VARIABLE_READ_NODE, 0, token),
+        .name = pm_parser_constant_id_token(parser, token)
+    };
+
+    return node;
+}
+
+/**
+ * Initialize a new InstanceVariableWriteNode node from an InstanceVariableRead
+ * node.
+ */
+static pm_instance_variable_write_node_t *
+pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
+    pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_instance_variable_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_INSTANCE_VARIABLE_WRITE_NODE, flags, read_node, value),
+        .name = read_node->name,
+        .name_loc = PM_LOCATION_NODE_VALUE(read_node),
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Append a part into a list of string parts. Importantly this handles nested
+ * interpolated strings by not necessarily removing the marker for static
+ * literals.
+ */
+static void
+pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
+    switch (PM_NODE_TYPE(part)) {
+        case PM_STRING_NODE:
+            pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+            break;
+        case PM_EMBEDDED_STATEMENTS_NODE: {
+            pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
+            pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
+
+            if (embedded == NULL) {
+                // If there are no statements or more than one statement, then
+                // we lose the static literal flag.
+                pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
+            } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
+                // If the embedded statement is a string, then we can keep the
+                // static literal flag and mark the string as frozen.
+                pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
+            } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
+                // If the embedded statement is an interpolated string and it's
+                // a static literal, then we can keep the static literal flag.
+            } else {
+                // Otherwise we lose the static literal flag.
+                pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
+            }
+
+            break;
+        }
+        case PM_EMBEDDED_VARIABLE_NODE:
+            pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+            break;
+        default:
+            assert(false && "unexpected node type");
+            break;
+    }
+
+    pm_node_list_append(parts, part);
+}
+
+/**
+ * Allocate a new InterpolatedRegularExpressionNode node.
+ */
+static pm_interpolated_regular_expression_node_t *
+pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
+    pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
+
+    *node = (pm_interpolated_regular_expression_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .parts = { 0 }
+    };
+
+    return node;
+}
+
+static inline void
+pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
+    if (node->base.location.start > part->location.start) {
+        node->base.location.start = part->location.start;
+    }
+    if (node->base.location.end < part->location.end) {
+        node->base.location.end = part->location.end;
+    }
+
+    pm_interpolated_node_append(UP(node), &node->parts, part);
+}
+
+static inline void
+pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+    node->base.location.end = closing->end;
+    pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
+}
+
+/**
+ * Append a part to an InterpolatedStringNode node.
+ *
+ * This has some somewhat complicated semantics, because we need to update
+ * multiple flags that have somewhat confusing interactions.
+ *
+ * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
+ * single static literal string that can be pushed onto the stack on its own.
+ * Note that this doesn't necessarily mean that the string will be frozen or
+ * not; the instructions in CRuby will be either putobject or putstring,
+ * depending on the combination of `--enable-frozen-string-literal`,
+ * `# frozen_string_literal: true`, and whether or not there is interpolation.
+ *
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN indicates that the string should be
+ * explicitly frozen. This will only happen if the string is comprised entirely
+ * of string parts that are themselves static literals and frozen.
+ *
+ * PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE indicates that the string should
+ * be explicitly marked as mutable. This will happen from
+ * `--disable-frozen-string-literal` or `# frozen_string_literal: false`. This
+ * is necessary to indicate that the string should be left up to the runtime,
+ * which could potentially use a chilled string otherwise.
+ */
+static inline void
+pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+#define CLEAR_FLAGS(node) \
+    node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
+
+#define MUTABLE_FLAGS(node) \
+    node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
+
+    if (node->parts.size == 0 && node->opening_loc.start == NULL) {
+        node->base.location.start = part->location.start;
+    }
+
+    node->base.location.end = MAX(node->base.location.end, part->location.end);
+
+    switch (PM_NODE_TYPE(part)) {
+        case PM_STRING_NODE:
+            // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
+            // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
+            // as long as this interpolation only consists of other string literals.
+            if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
+                pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+            }
+            part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
+            break;
+        case PM_INTERPOLATED_STRING_NODE:
+            if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
+                // If the string that we're concatenating is a static literal,
+                // then we can keep the static literal flag for this string.
+            } else {
+                // Otherwise, we lose the static literal flag here and we should
+                // also clear the mutability flags.
+                CLEAR_FLAGS(node);
+            }
+            break;
+        case PM_EMBEDDED_STATEMENTS_NODE: {
+            pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
+            pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
+
+            if (embedded == NULL) {
+                // If we're embedding multiple statements or no statements, then
+                // the string is not longer a static literal.
+                CLEAR_FLAGS(node);
+            } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
+                // If the embedded statement is a string, then we can make that
+                // string as frozen and static literal, and not touch the static
+                // literal status of this string.
+                embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
+
+                if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
+                    MUTABLE_FLAGS(node);
+                }
+            } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
+                // If the embedded statement is an interpolated string, but that
+                // string is marked as static literal, then we can keep our
+                // static literal status for this string.
+                if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
+                    MUTABLE_FLAGS(node);
+                }
+            } else {
+                // In all other cases, we lose the static literal flag here and
+                // become mutable.
+                CLEAR_FLAGS(node);
+            }
+
+            break;
+        }
+        case PM_EMBEDDED_VARIABLE_NODE:
+            // Embedded variables clear static literal, which means we also
+            // should clear the mutability flags.
+            CLEAR_FLAGS(node);
+            break;
+        case PM_X_STRING_NODE:
+        case PM_INTERPOLATED_X_STRING_NODE:
+        case PM_SYMBOL_NODE:
+        case PM_INTERPOLATED_SYMBOL_NODE:
+            // These will only happen in error cases. But we want to handle it
+            // here so that we don't fail the assertion.
+            CLEAR_FLAGS(node);
+            break;
+        default:
+            assert(false && "unexpected node type");
+            break;
+    }
+
+    pm_node_list_append(&node->parts, part);
+
+#undef CLEAR_FLAGS
+#undef MUTABLE_FLAGS
+}
+
+/**
+ * Allocate and initialize a new InterpolatedStringNode node.
+ */
+static pm_interpolated_string_node_t *
+pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
+    pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
+    pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
+
+    switch (parser->frozen_string_literal) {
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+            flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
+            break;
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+            flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
+            break;
+    }
+
+    *node = (pm_interpolated_string_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_STRING_NODE, flags, opening, closing),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .parts = { 0 }
+    };
+
+    if (parts != NULL) {
+        pm_node_t *part;
+        PM_NODE_LIST_FOREACH(parts, index, part) {
+            pm_interpolated_string_node_append(node, part);
+        }
+    }
+
+    return node;
+}
+
+/**
+ * Set the closing token of the given InterpolatedStringNode node.
+ */
+static void
+pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
+    node->base.location.end = closing->end;
+}
+
+static void
+pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
+    if (node->parts.size == 0 && node->opening_loc.start == NULL) {
+        node->base.location.start = part->location.start;
+    }
+
+    pm_interpolated_node_append(UP(node), &node->parts, part);
+    node->base.location.end = MAX(node->base.location.end, part->location.end);
+}
+
+static void
+pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
+    node->base.location.end = closing->end;
+}
+
+/**
+ * Allocate and initialize a new InterpolatedSymbolNode node.
+ */
+static pm_interpolated_symbol_node_t *
+pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
+    pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
+
+    *node = (pm_interpolated_symbol_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .parts = { 0 }
+    };
+
+    if (parts != NULL) {
+        pm_node_t *part;
+        PM_NODE_LIST_FOREACH(parts, index, part) {
+            pm_interpolated_symbol_node_append(node, part);
+        }
+    }
+
+    return node;
+}
+
+/**
+ * Allocate a new InterpolatedXStringNode node.
+ */
+static pm_interpolated_x_string_node_t *
+pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
+
+    *node = (pm_interpolated_x_string_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_INTERPOLATED_X_STRING_NODE, 0, opening, closing),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .parts = { 0 }
+    };
+
+    return node;
+}
+
+static inline void
+pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
+    pm_interpolated_node_append(UP(node), &node->parts, part);
+    node->base.location.end = part->location.end;
+}
+
+static inline void
+pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
+    node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
+    node->base.location.end = closing->end;
+}
+
+/**
+ * Create a local variable read that is reading the implicit 'it' variable.
+ */
+static pm_it_local_variable_read_node_t *
+pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
+
+    *node = (pm_it_local_variable_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_IT_LOCAL_VARIABLE_READ_NODE, 0, name),
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ItParametersNode node.
+ */
+static pm_it_parameters_node_t *
+pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
+
+    *node = (pm_it_parameters_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_IT_PARAMETERS_NODE, 0, opening, closing),
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new KeywordHashNode node.
+ */
+static pm_keyword_hash_node_t *
+pm_keyword_hash_node_create(pm_parser_t *parser) {
+    pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
+
+    *node = (pm_keyword_hash_node_t) {
+        .base = PM_NODE_INIT_UNSET(parser, PM_KEYWORD_HASH_NODE, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS),
+        .elements = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append an element to a KeywordHashNode node.
+ */
+static void
+pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
+    // If the element being added is not an AssocNode or does not have a symbol
+    // key, then we want to turn the SYMBOL_KEYS flag off.
+    if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
+        pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
+    }
+
+    pm_node_list_append(&hash->elements, element);
+    if (hash->base.location.start == NULL) {
+        hash->base.location.start = element->location.start;
+    }
+    hash->base.location.end = element->location.end;
+}
+
+/**
+ * Allocate and initialize a new RequiredKeywordParameterNode node.
+ */
+static pm_required_keyword_parameter_node_t *
+pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
+
+    *node = (pm_required_keyword_parameter_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_KEYWORD_PARAMETER_NODE, 0, name),
+        .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
+        .name_loc = PM_LOCATION_TOKEN_VALUE(name),
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new OptionalKeywordParameterNode node.
+ */
+static pm_optional_keyword_parameter_node_t *
+pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
+    pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
+
+    *node = (pm_optional_keyword_parameter_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_KEYWORD_PARAMETER_NODE, 0, name, value),
+        .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
+        .name_loc = PM_LOCATION_TOKEN_VALUE(name),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new KeywordRestParameterNode node.
+ */
+static pm_keyword_rest_parameter_node_t *
+pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
+    pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
+
+    *node = (pm_keyword_rest_parameter_node_t) {
+        .base = (
+            (name->type == PM_TOKEN_NOT_PROVIDED)
+            ? PM_NODE_INIT_TOKEN(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator)
+            : PM_NODE_INIT_TOKENS(parser, PM_KEYWORD_REST_PARAMETER_NODE, 0, operator, name)
+        ),
+        .name = pm_parser_optional_constant_id_token(parser, name),
+        .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new LambdaNode node.
+ */
+static pm_lambda_node_t *
+pm_lambda_node_create(
+    pm_parser_t *parser,
+    pm_constant_id_list_t *locals,
+    const pm_token_t *operator,
+    const pm_token_t *opening,
+    const pm_token_t *closing,
+    pm_node_t *parameters,
+    pm_node_t *body
+) {
+    pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
+
+    *node = (pm_lambda_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_LAMBDA_NODE, 0, operator, closing),
+        .locals = *locals,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
+        .parameters = parameters,
+        .body = body
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new LocalVariableAndWriteNode node.
+ */
+static pm_local_variable_and_write_node_t *
+pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
+    assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
+    assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+    pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
+
+    *node = (pm_local_variable_and_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_AND_WRITE_NODE, 0, target, value),
+        .name_loc = target->location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .name = name,
+        .depth = depth
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new LocalVariableOperatorWriteNode node.
+ */
+static pm_local_variable_operator_write_node_t *
+pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
+    pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
+
+    *node = (pm_local_variable_operator_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, 0, target, value),
+        .name_loc = target->location,
+        .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .name = name,
+        .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
+        .depth = depth
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new LocalVariableOrWriteNode node.
+ */
+static pm_local_variable_or_write_node_t *
+pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
+    assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
+    assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
+    pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
+
+    *node = (pm_local_variable_or_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_LOCAL_VARIABLE_OR_WRITE_NODE, 0, target, value),
+        .name_loc = target->location,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value,
+        .name = name,
+        .depth = depth
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new LocalVariableReadNode node with constant_id.
+ */
+static pm_local_variable_read_node_t *
+pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
+    if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
+
+    pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
+
+    *node = (pm_local_variable_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_READ_NODE, 0, name),
+        .name = name_id,
+        .depth = depth
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new LocalVariableReadNode node.
+ */
+static pm_local_variable_read_node_t *
+pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
+    pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
+    return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
+}
+
+/**
+ * Allocate and initialize a new LocalVariableReadNode node for a missing local
+ * variable. (This will only happen when there is a syntax error.)
+ */
+static pm_local_variable_read_node_t *
+pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
+    pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
+    return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
+}
+
+/**
+ * Allocate and initialize a new LocalVariableWriteNode node.
+ */
+static pm_local_variable_write_node_t *
+pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
+    pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_local_variable_write_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_LOCAL_VARIABLE_WRITE_NODE, flags, name_loc, value),
+        .name = name,
+        .depth = depth,
+        .value = value,
+        .name_loc = *name_loc,
+        .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Returns true if the given bounds comprise `it`.
+ */
+static inline bool
+pm_token_is_it(const uint8_t *start, const uint8_t *end) {
+    return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
+}
+
+/**
+ * Returns true if the given bounds comprise a numbered parameter (i.e., they
+ * are of the form /^_\d$/).
+ */
+static inline bool
+pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
+    return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
+}
+
+/**
+ * Ensure the given bounds do not comprise a numbered parameter. If they do, add
+ * an appropriate error message to the parser.
+ */
+static inline void
+pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    if (pm_token_is_numbered_parameter(start, end)) {
+        PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
+    }
+}
+
+/**
+ * Allocate and initialize a new LocalVariableTargetNode node with the given
+ * name and depth.
+ */
+static pm_local_variable_target_node_t *
+pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
+    pm_refute_numbered_parameter(parser, location->start, location->end);
+    pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
+
+    *node = (pm_local_variable_target_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_LOCAL_VARIABLE_TARGET_NODE, 0, location),
+        .name = name,
+        .depth = depth
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new MatchPredicateNode node.
+ */
+static pm_match_predicate_node_t *
+pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
+    pm_assert_value_expression(parser, value);
+
+    pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
+
+    *node = (pm_match_predicate_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_MATCH_PREDICATE_NODE, 0, value, pattern),
+        .value = value,
+        .pattern = pattern,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new MatchRequiredNode node.
+ */
+static pm_match_required_node_t *
+pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
+    pm_assert_value_expression(parser, value);
+
+    pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
+
+    *node = (pm_match_required_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_MATCH_REQUIRED_NODE, 0, value, pattern),
+        .value = value,
+        .pattern = pattern,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new MatchWriteNode node.
+ */
+static pm_match_write_node_t *
+pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
+    pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
+
+    *node = (pm_match_write_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_MATCH_WRITE_NODE, 0, call),
+        .call = call,
+        .targets = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new ModuleNode node.
+ */
+static pm_module_node_t *
+pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
+    pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
+
+    *node = (pm_module_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_MODULE_NODE, 0, module_keyword, end_keyword),
+        .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
+        .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
+        .constant_path = constant_path,
+        .body = body,
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
+        .name = pm_parser_constant_id_token(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize new MultiTargetNode node.
+ */
+static pm_multi_target_node_t *
+pm_multi_target_node_create(pm_parser_t *parser) {
+    pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
+
+    *node = (pm_multi_target_node_t) {
+        .base = PM_NODE_INIT_UNSET(parser, PM_MULTI_TARGET_NODE, 0),
+        .lefts = { 0 },
+        .rest = NULL,
+        .rights = { 0 },
+        .lparen_loc = { 0 },
+        .rparen_loc = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append a target to a MultiTargetNode node.
+ */
+static void
+pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
+    if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
+        if (node->rest == NULL) {
+            node->rest = target;
+        } else {
+            pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
+            pm_node_list_append(&node->rights, target);
+        }
+    } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
+        if (node->rest == NULL) {
+            node->rest = target;
+        } else {
+            PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
+            pm_node_list_append(&node->rights, target);
+        }
+    } else if (node->rest == NULL) {
+        pm_node_list_append(&node->lefts, target);
+    } else {
+        pm_node_list_append(&node->rights, target);
+    }
+
+    if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
+        node->base.location.start = target->location.start;
+    }
+
+    if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
+        node->base.location.end = target->location.end;
+    }
+}
+
+/**
+ * Set the opening of a MultiTargetNode node.
+ */
+static void
+pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
+    node->base.location.start = lparen->start;
+    node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
+}
+
+/**
+ * Set the closing of a MultiTargetNode node.
+ */
+static void
+pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
+    node->base.location.end = rparen->end;
+    node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
+}
+
+/**
+ * Allocate a new MultiWriteNode node.
+ */
+static pm_multi_write_node_t *
+pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
+    pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
+    pm_node_flags_t flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY);
+
+    *node = (pm_multi_write_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_MULTI_WRITE_NODE, flags, target, value),
+        .lefts = target->lefts,
+        .rest = target->rest,
+        .rights = target->rights,
+        .lparen_loc = target->lparen_loc,
+        .rparen_loc = target->rparen_loc,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    // Explicitly do not call pm_node_destroy here because we want to keep
+    // around all of the information within the MultiWriteNode node.
+    xfree(target);
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new NextNode node.
+ */
+static pm_next_node_t *
+pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
+    assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
+    pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
+
+    *node = (pm_next_node_t) {
+        .base = (
+            (arguments == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_NEXT_NODE, 0, keyword)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_NEXT_NODE, 0, keyword, arguments)
+        ),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .arguments = arguments
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new NilNode node.
+ */
+static pm_nil_node_t *
+pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_NIL);
+    pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
+
+    *node = (pm_nil_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_NIL_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new NoKeywordsParameterNode node.
+ */
+static pm_no_keywords_parameter_node_t *
+pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
+    assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
+    assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
+    pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
+
+    *node = (pm_no_keywords_parameter_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_NO_KEYWORDS_PARAMETER_NODE, 0, operator, keyword),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new NumberedParametersNode node.
+ */
+static pm_numbered_parameters_node_t *
+pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
+    pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
+
+    *node = (pm_numbered_parameters_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_PARAMETERS_NODE, 0, location),
+        .maximum = maximum
+    };
+
+    return node;
+}
+
+/**
+ * The maximum numbered reference value is defined as the maximum value that an
+ * integer can hold minus 1 bit for CRuby instruction sequence operand tagging.
+ */
+#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
+
+/**
+ * Parse the decimal number represented by the range of bytes. Returns
+ * 0 if the number fails to parse or if the number is greater than the maximum
+ * value representable by a numbered reference. This function assumes that the
+ * range of bytes has already been validated to contain only decimal digits.
+ */
+static uint32_t
+pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
+    const uint8_t *start = token->start + 1;
+    const uint8_t *end = token->end;
+
+    ptrdiff_t diff = end - start;
+    assert(diff > 0);
+#if PTRDIFF_MAX > SIZE_MAX
+    assert(diff < (ptrdiff_t) SIZE_MAX);
+#endif
+    size_t length = (size_t) diff;
+
+    char *digits = xcalloc(length + 1, sizeof(char));
+    memcpy(digits, start, length);
+    digits[length] = '\0';
+
+    char *endptr;
+    errno = 0;
+    unsigned long value = strtoul(digits, &endptr, 10);
+
+    if ((digits == endptr) || (*endptr != '\0')) {
+        pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+        value = 0;
+    }
+
+    xfree(digits);
+
+    if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
+        PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
+        value = 0;
+    }
+
+    return (uint32_t) value;
+}
+
+#undef NTH_REF_MAX
+
+/**
+ * Allocate and initialize a new NthReferenceReadNode node.
+ */
+static pm_numbered_reference_read_node_t *
+pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
+    assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
+    pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
+
+    *node = (pm_numbered_reference_read_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_NUMBERED_REFERENCE_READ_NODE, 0, name),
+        .number = pm_numbered_reference_read_node_number(parser, name)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new OptionalParameterNode node.
+ */
+static pm_optional_parameter_node_t *
+pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
+    pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
+
+    *node = (pm_optional_parameter_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_OPTIONAL_PARAMETER_NODE, 0, name, value),
+        .name = pm_parser_constant_id_token(parser, name),
+        .name_loc = PM_LOCATION_TOKEN_VALUE(name),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .value = value
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new OrNode node.
+ */
+static pm_or_node_t *
+pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
+    pm_assert_value_expression(parser, left);
+
+    pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
+
+    *node = (pm_or_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_OR_NODE, 0, left, right),
+        .left = left,
+        .right = right,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ParametersNode node.
+ */
+static pm_parameters_node_t *
+pm_parameters_node_create(pm_parser_t *parser) {
+    pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
+
+    *node = (pm_parameters_node_t) {
+        .base = PM_NODE_INIT_UNSET(parser, PM_PARAMETERS_NODE, 0),
+        .rest = NULL,
+        .keyword_rest = NULL,
+        .block = NULL,
+        .requireds = { 0 },
+        .optionals = { 0 },
+        .posts = { 0 },
+        .keywords = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Set the location properly for the parameters node.
+ */
+static void
+pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
+    if (params->base.location.start == NULL) {
+        params->base.location.start = param->location.start;
+    } else {
+        params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
+    }
+
+    if (params->base.location.end == NULL) {
+        params->base.location.end = param->location.end;
+    } else {
+        params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
+    }
+}
+
+/**
+ * Append a required parameter to a ParametersNode node.
+ */
+static void
+pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
+    pm_parameters_node_location_set(params, param);
+    pm_node_list_append(&params->requireds, param);
+}
+
+/**
+ * Append an optional parameter to a ParametersNode node.
+ */
+static void
+pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
+    pm_parameters_node_location_set(params, UP(param));
+    pm_node_list_append(&params->optionals, UP(param));
+}
+
+/**
+ * Append a post optional arguments parameter to a ParametersNode node.
+ */
+static void
+pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
+    pm_parameters_node_location_set(params, param);
+    pm_node_list_append(&params->posts, param);
+}
+
+/**
+ * Set the rest parameter on a ParametersNode node.
+ */
+static void
+pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
+    pm_parameters_node_location_set(params, param);
+    params->rest = param;
+}
+
+/**
+ * Append a keyword parameter to a ParametersNode node.
+ */
+static void
+pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
+    pm_parameters_node_location_set(params, param);
+    pm_node_list_append(&params->keywords, param);
+}
+
+/**
+ * Set the keyword rest parameter on a ParametersNode node.
+ */
+static void
+pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
+    assert(params->keyword_rest == NULL);
+    pm_parameters_node_location_set(params, param);
+    params->keyword_rest = param;
+}
+
+/**
+ * Set the block parameter on a ParametersNode node.
+ */
+static void
+pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
+    assert(params->block == NULL);
+    pm_parameters_node_location_set(params, UP(param));
+    params->block = param;
+}
+
+/**
+ * Allocate a new ProgramNode node.
+ */
+static pm_program_node_t *
+pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
+    pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
+
+    *node = (pm_program_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_PROGRAM_NODE, 0, statements),
+        .locals = *locals,
+        .statements = statements
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize new ParenthesesNode node.
+ */
+static pm_parentheses_node_t *
+pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
+    pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
+
+    *node = (pm_parentheses_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_PARENTHESES_NODE, flags, opening, closing),
+        .body = body,
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new PinnedExpressionNode node.
+ */
+static pm_pinned_expression_node_t *
+pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
+    pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
+
+    *node = (pm_pinned_expression_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_PINNED_EXPRESSION_NODE, 0, operator, rparen),
+        .expression = expression,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
+        .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new PinnedVariableNode node.
+ */
+static pm_pinned_variable_node_t *
+pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
+    pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
+
+    *node = (pm_pinned_variable_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_PINNED_VARIABLE_NODE, 0, operator, variable),
+        .variable = variable,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new PostExecutionNode node.
+ */
+static pm_post_execution_node_t *
+pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
+    pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
+
+    *node = (pm_post_execution_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_POST_EXECUTION_NODE, 0, keyword, closing),
+        .statements = statements,
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new PreExecutionNode node.
+ */
+static pm_pre_execution_node_t *
+pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
+    pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
+
+    *node = (pm_pre_execution_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_PRE_EXECUTION_NODE, 0, keyword, closing),
+        .statements = statements,
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize new RangeNode node.
+ */
+static pm_range_node_t *
+pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
+    pm_assert_value_expression(parser, left);
+    pm_assert_value_expression(parser, right);
+
+    pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
+    pm_node_flags_t flags = 0;
+
+    // Indicate that this node is an exclusive range if the operator is `...`.
+    if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
+        flags |= PM_RANGE_FLAGS_EXCLUDE_END;
+    }
+
+    // Indicate that this node is a static literal (i.e., can be compiled with
+    // a putobject in CRuby) if the left and right are implicit nil, explicit
+    // nil, or integers.
+    if (
+        (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
+        (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
+    ) {
+        flags |= PM_NODE_FLAG_STATIC_LITERAL;
+    }
+
+    *node = (pm_range_node_t) {
+        .base = PM_NODE_INIT(parser, PM_RANGE_NODE, flags, (left == NULL ? operator->start : left->location.start), (right == NULL ? operator->end : right->location.end)),
+        .left = left,
+        .right = right,
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new RedoNode node.
+ */
+static pm_redo_node_t *
+pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_REDO);
+    pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
+
+    *node = (pm_redo_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_REDO_NODE, 0, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new initialize a new RegularExpressionNode node with the given
+ * unescaped string.
+ */
+static pm_regular_expression_node_t *
+pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
+    pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
+    pm_node_flags_t flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL;
+
+    *node = (pm_regular_expression_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_REGULAR_EXPRESSION_NODE, flags, opening, closing),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .content_loc = PM_LOCATION_TOKEN_VALUE(content),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = *unescaped
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new initialize a new RegularExpressionNode node.
+ */
+static inline pm_regular_expression_node_t *
+pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
+}
+
+/**
+ * Allocate a new RequiredParameterNode node.
+ */
+static pm_required_parameter_node_t *
+pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
+
+    *node = (pm_required_parameter_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_REQUIRED_PARAMETER_NODE, 0, token),
+        .name = pm_parser_constant_id_token(parser, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new RescueModifierNode node.
+ */
+static pm_rescue_modifier_node_t *
+pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
+    pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
+
+    *node = (pm_rescue_modifier_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_RESCUE_MODIFIER_NODE, 0, expression, rescue_expression),
+        .expression = expression,
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .rescue_expression = rescue_expression
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new RescueNode node.
+ */
+static pm_rescue_node_t *
+pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
+    pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
+
+    *node = (pm_rescue_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_RESCUE_NODE, 0, keyword),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .operator_loc = { 0 },
+        .then_keyword_loc = { 0 },
+        .reference = NULL,
+        .statements = NULL,
+        .subsequent = NULL,
+        .exceptions = { 0 }
+    };
+
+    return node;
+}
+
+static inline void
+pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
+    node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+}
+
+/**
+ * Set the reference of a rescue node, and update the location of the node.
+ */
+static void
+pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
+    node->reference = reference;
+    node->base.location.end = reference->location.end;
+}
+
+/**
+ * Set the statements of a rescue node, and update the location of the node.
+ */
+static void
+pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
+    node->statements = statements;
+    if (pm_statements_node_body_length(statements) > 0) {
+        node->base.location.end = statements->base.location.end;
+    }
+}
+
+/**
+ * Set the subsequent of a rescue node, and update the location.
+ */
+static void
+pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
+    node->subsequent = subsequent;
+    node->base.location.end = subsequent->base.location.end;
+}
+
+/**
+ * Append an exception node to a rescue node, and update the location.
+ */
+static void
+pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
+    pm_node_list_append(&node->exceptions, exception);
+    node->base.location.end = exception->location.end;
+}
+
+/**
+ * Allocate a new RestParameterNode node.
+ */
+static pm_rest_parameter_node_t *
+pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
+    pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
+
+    *node = (pm_rest_parameter_node_t) {
+        .base = (
+            (name->type == PM_TOKEN_NOT_PROVIDED)
+            ? PM_NODE_INIT_TOKEN(parser, PM_REST_PARAMETER_NODE, 0, operator)
+            : PM_NODE_INIT_TOKENS(parser, PM_REST_PARAMETER_NODE, 0, operator, name)
+        ),
+        .name = pm_parser_optional_constant_id_token(parser, name),
+        .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new RetryNode node.
+ */
+static pm_retry_node_t *
+pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_RETRY);
+    pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
+
+    *node = (pm_retry_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_RETRY_NODE, 0, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new ReturnNode node.
+ */
+static pm_return_node_t *
+pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
+    pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
+
+    *node = (pm_return_node_t) {
+        .base = (
+            (arguments == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_RETURN_NODE, 0, keyword)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_RETURN_NODE, 0, keyword, arguments)
+        ),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .arguments = arguments
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SelfNode node.
+ */
+static pm_self_node_t *
+pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_SELF);
+    pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
+
+    *node = (pm_self_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_SELF_NODE, 0, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new ShareableConstantNode node.
+ */
+static pm_shareable_constant_node_t *
+pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
+    pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
+
+    *node = (pm_shareable_constant_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_SHAREABLE_CONSTANT_NODE, (pm_node_flags_t) value, write),
+        .write = write
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new SingletonClassNode node.
+ */
+static pm_singleton_class_node_t *
+pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
+    pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
+
+    *node = (pm_singleton_class_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_SINGLETON_CLASS_NODE, 0, class_keyword, end_keyword),
+        .locals = *locals,
+        .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .expression = expression,
+        .body = body,
+        .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SourceEncodingNode node.
+ */
+static pm_source_encoding_node_t *
+pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
+    pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
+
+    *node = (pm_source_encoding_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_ENCODING_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SourceFileNode node.
+ */
+static pm_source_file_node_t*
+pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
+    pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
+    assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
+
+    pm_node_flags_t flags = 0;
+
+    switch (parser->frozen_string_literal) {
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+            flags |= PM_STRING_FLAGS_MUTABLE;
+            break;
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+            flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+            break;
+    }
+
+    *node = (pm_source_file_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_FILE_NODE, flags, file_keyword),
+        .filepath = parser->filepath
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SourceLineNode node.
+ */
+static pm_source_line_node_t *
+pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD___LINE__);
+    pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
+
+    *node = (pm_source_line_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_SOURCE_LINE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new SplatNode node.
+ */
+static pm_splat_node_t *
+pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
+    pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
+
+    *node = (pm_splat_node_t) {
+        .base = (
+            (expression == NULL)
+            ? PM_NODE_INIT_TOKEN(parser, PM_SPLAT_NODE, 0, operator)
+            : PM_NODE_INIT_TOKEN_NODE(parser, PM_SPLAT_NODE, 0, operator, expression)
+        ),
+        .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
+        .expression = expression
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new StatementsNode node.
+ */
+static pm_statements_node_t *
+pm_statements_node_create(pm_parser_t *parser) {
+    pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
+
+    *node = (pm_statements_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_STATEMENTS_NODE, 0),
+        .body = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Get the length of the given StatementsNode node's body.
+ */
+static size_t
+pm_statements_node_body_length(pm_statements_node_t *node) {
+    return node && node->body.size;
+}
+
+/**
+ * Set the location of the given StatementsNode.
+ */
+static void
+pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
+    node->base.location = (pm_location_t) { .start = start, .end = end };
+}
+
+/**
+ * Update the location of the statements node based on the statement that is
+ * being added to the list.
+ */
+static inline void
+pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
+    if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
+        node->base.location.start = statement->location.start;
+    }
+
+    if (statement->location.end > node->base.location.end) {
+        node->base.location.end = statement->location.end;
+    }
+}
+
+/**
+ * Append a new node to the given StatementsNode node's body.
+ */
+static void
+pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
+    pm_statements_node_body_update(node, statement);
+
+    if (node->body.size > 0) {
+        const pm_node_t *previous = node->body.nodes[node->body.size - 1];
+
+        switch (PM_NODE_TYPE(previous)) {
+            case PM_BREAK_NODE:
+            case PM_NEXT_NODE:
+            case PM_REDO_NODE:
+            case PM_RETRY_NODE:
+            case PM_RETURN_NODE:
+                pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
+                break;
+            default:
+                break;
+        }
+    }
+
+    pm_node_list_append(&node->body, statement);
+    if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
+}
+
+/**
+ * Prepend a new node to the given StatementsNode node's body.
+ */
+static void
+pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
+    pm_statements_node_body_update(node, statement);
+    pm_node_list_prepend(&node->body, statement);
+    pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
+}
+
+/**
+ * Allocate a new StringNode node with the current string on the parser.
+ */
+static inline pm_string_node_t *
+pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
+    pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
+    pm_node_flags_t flags = 0;
+
+    switch (parser->frozen_string_literal) {
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+            flags = PM_STRING_FLAGS_MUTABLE;
+            break;
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+            flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+            break;
+    }
+
+    const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start);
+    const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end);
+
+    *node = (pm_string_node_t) {
+        .base = PM_NODE_INIT(parser, PM_STRING_NODE, flags, start, end),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .content_loc = PM_LOCATION_TOKEN_VALUE(content),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = *string
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new StringNode node.
+ */
+static pm_string_node_t *
+pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
+}
+
+/**
+ * Allocate a new StringNode node and create it using the current string on the
+ * parser.
+ */
+static pm_string_node_t *
+pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
+    parser->current_string = PM_STRING_EMPTY;
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SuperNode node.
+ */
+static pm_super_node_t *
+pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
+    assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
+    pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
+
+    const uint8_t *end = pm_arguments_end(arguments);
+    if (end == NULL) {
+        assert(false && "unreachable");
+    }
+
+    *node = (pm_super_node_t) {
+        .base = PM_NODE_INIT(parser, PM_SUPER_NODE, 0, keyword->start, end),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .lparen_loc = arguments->opening_loc,
+        .arguments = arguments->arguments,
+        .rparen_loc = arguments->closing_loc,
+        .block = arguments->block
+    };
+
+    return node;
+}
+
+/**
+ * Read through the contents of a string and check if it consists solely of
+ * US-ASCII code points.
+ */
+static bool
+pm_ascii_only_p(const pm_string_t *contents) {
+    const size_t length = pm_string_length(contents);
+    const uint8_t *source = pm_string_source(contents);
+
+    for (size_t index = 0; index < length; index++) {
+        if (source[index] & 0x80) return false;
+    }
+
+    return true;
+}
+
+/**
+ * Validate that the contents of the given symbol are all valid UTF-8.
+ */
+static void
+parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
+    for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
+        size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
+
+        if (width == 0) {
+            pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+            break;
+        }
+
+        cursor += width;
+    }
+}
+
+/**
+ * Validate that the contents of the given symbol are all valid in the encoding
+ * of the parser.
+ */
+static void
+parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
+    const pm_encoding_t *encoding = parser->encoding;
+
+    for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
+        size_t width = encoding->char_width(cursor, end - cursor);
+
+        if (width == 0) {
+            pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+            break;
+        }
+
+        cursor += width;
+    }
+}
+
+/**
+ * Ruby "downgrades" the encoding of Symbols to US-ASCII if the associated
+ * encoding is ASCII-compatible and the Symbol consists only of US-ASCII code
+ * points. Otherwise, the encoding may be explicitly set with an escape
+ * sequence.
+ *
+ * If the validate flag is set, then it will check the contents of the symbol
+ * to ensure that all characters are valid in the encoding.
+ */
+static inline pm_node_flags_t
+parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
+    if (parser->explicit_encoding != NULL) {
+        // A Symbol may optionally have its encoding explicitly set. This will
+        // happen if an escape sequence results in a non-ASCII code point.
+        if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
+            return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
+        } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+            return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
+        } else if (validate) {
+            parse_symbol_encoding_validate_other(parser, location, contents);
+        }
+    } else if (pm_ascii_only_p(contents)) {
+        // Ruby stipulates that all source files must use an ASCII-compatible
+        // encoding. Thus, all symbols appearing in source are eligible for
+        // "downgrading" to US-ASCII.
+        return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
+    } else if (validate) {
+        parse_symbol_encoding_validate_other(parser, location, contents);
+    }
+
+    return 0;
+}
+
+static pm_node_flags_t
+parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
+    assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
+            (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
+            (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
+            (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
+
+    // There's special validation logic used if a string does not contain any character escape sequences.
+    if (parser->explicit_encoding == NULL) {
+        // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
+        // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
+        // the US-ASCII encoding.
+        if (ascii_only) {
+            return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
+        }
+
+        if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+            if (!ascii_only) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+            }
+        } else if (parser->encoding != modifier_encoding) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
+
+            if (modifier == 'n' && !ascii_only) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
+            }
+        }
+
+        return flags;
+    }
+
+    // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
+    bool mixed_encoding = false;
+
+    if (mixed_encoding) {
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
+    } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+        // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
+        bool valid_string_in_modifier_encoding = true;
+
+        if (!valid_string_in_modifier_encoding) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
+        }
+    } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+        // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
+        if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
+        }
+    }
+
+    // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
+    return flags;
+}
+
+/**
+ * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
+ * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
+ * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
+ * may be explicitly set with an escape sequence.
+ */
+static pm_node_flags_t
+parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
+    // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
+    bool valid_unicode_range = true;
+    if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
+        return flags;
+    }
+
+    // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
+    // to multi-byte characters are allowed.
+    if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
+        // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
+        // following error message appearing twice. We do the same for compatibility.
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+    }
+
+    /**
+     * Start checking modifier flags. We need to process these before considering any explicit encodings that may have
+     * been set by character literals. The order in which the encoding modifiers is checked does not matter. In the
+     * event that both an encoding modifier and an explicit encoding would result in the same encoding we do not set
+     * the corresponding "forced_<encoding>" flag. Instead, the caller should check the encoding modifier flag and
+     * determine the encoding that way.
+     */
+
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
+        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
+    }
+
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
+        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
+    }
+
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
+        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
+    }
+
+    if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
+        return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
+    }
+
+    // At this point no encoding modifiers will be present on the regular expression as they would have already
+    // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
+    // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
+    if (ascii_only) {
+        return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
+    }
+
+    // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
+    // or by specifying a modifier.
+    //
+    // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
+    if (parser->explicit_encoding != NULL) {
+        if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+        } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+            return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Allocate and initialize a new SymbolNode node with the given unescaped
+ * string.
+ */
+static pm_symbol_node_t *
+pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
+    pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+
+    const uint8_t *start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start);
+    const uint8_t *end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end);
+
+    *node = (pm_symbol_node_t) {
+        .base = PM_NODE_INIT(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | flags, start, end),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .value_loc = PM_LOCATION_TOKEN_VALUE(value),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = *unescaped
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SymbolNode node.
+ */
+static inline pm_symbol_node_t *
+pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
+    return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
+}
+
+/**
+ * Allocate and initialize a new SymbolNode node with the current string.
+ */
+static pm_symbol_node_t *
+pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
+    pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
+    parser->current_string = PM_STRING_EMPTY;
+    return node;
+}
+
+/**
+ * Allocate and initialize a new SymbolNode node from a label.
+ */
+static pm_symbol_node_t *
+pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
+    pm_symbol_node_t *node;
+
+    switch (token->type) {
+        case PM_TOKEN_LABEL: {
+            pm_token_t opening = not_provided(parser);
+            pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
+
+            pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
+            node = pm_symbol_node_create(parser, &opening, &label, &closing);
+
+            assert((label.end - label.start) >= 0);
+            pm_string_shared_init(&node->unescaped, label.start, label.end);
+            pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
+
+            break;
+        }
+        case PM_TOKEN_MISSING: {
+            pm_token_t opening = not_provided(parser);
+            pm_token_t closing = not_provided(parser);
+
+            pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
+            node = pm_symbol_node_create(parser, &opening, &label, &closing);
+            break;
+        }
+        default:
+            assert(false && "unreachable");
+            node = NULL;
+            break;
+    }
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized SymbolNode node.
+ */
+static pm_symbol_node_t *
+pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
+    pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+
+    *node = (pm_symbol_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING),
+        .value_loc = PM_LOCATION_NULL_VALUE(parser),
+        .unescaped = { 0 }
+    };
+
+    pm_string_constant_init(&node->unescaped, content, strlen(content));
+    return node;
+}
+
+/**
+ * Check if the given node is a label in a hash.
+ */
+static bool
+pm_symbol_node_label_p(pm_node_t *node) {
+    const uint8_t *end = NULL;
+
+    switch (PM_NODE_TYPE(node)) {
+        case PM_SYMBOL_NODE:
+            end = ((pm_symbol_node_t *) node)->closing_loc.end;
+            break;
+        case PM_INTERPOLATED_SYMBOL_NODE:
+            end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
+            break;
+        default:
+            return false;
+    }
+
+    return (end != NULL) && (end[-1] == ':');
+}
+
+/**
+ * Convert the given StringNode node to a SymbolNode node.
+ */
+static pm_symbol_node_t *
+pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+
+    *new_node = (pm_symbol_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_SYMBOL_NODE, PM_NODE_FLAG_STATIC_LITERAL, opening, closing),
+        .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
+        .value_loc = node->content_loc,
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = node->unescaped
+    };
+
+    pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
+    pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
+
+    // We are explicitly _not_ using pm_node_destroy here because we don't want
+    // to trash the unescaped string. We could instead copy the string if we
+    // know that it is owned, but we're taking the fast path for now.
+    xfree(node);
+
+    return new_node;
+}
+
+/**
+ * Convert the given SymbolNode node to a StringNode node.
+ */
+static pm_string_node_t *
+pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
+    pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
+    pm_node_flags_t flags = 0;
+
+    switch (parser->frozen_string_literal) {
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
+            flags = PM_STRING_FLAGS_MUTABLE;
+            break;
+        case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
+            flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
+            break;
+    }
+
+    *new_node = (pm_string_node_t) {
+        .base = PM_NODE_INIT_NODE(parser, PM_STRING_NODE, flags, node),
+        .opening_loc = node->opening_loc,
+        .content_loc = node->value_loc,
+        .closing_loc = node->closing_loc,
+        .unescaped = node->unescaped
+    };
+
+    // We are explicitly _not_ using pm_node_destroy here because we don't want
+    // to trash the unescaped string. We could instead copy the string if we
+    // know that it is owned, but we're taking the fast path for now.
+    xfree(node);
+
+    return new_node;
+}
+
+/**
+ * Allocate and initialize a new TrueNode node.
+ */
+static pm_true_node_t *
+pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_TRUE);
+    pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
+
+    *node = (pm_true_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL, token)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized TrueNode node.
+ */
+static pm_true_node_t *
+pm_true_node_synthesized_create(pm_parser_t *parser) {
+    pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
+
+    *node = (pm_true_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_TRUE_NODE, PM_NODE_FLAG_STATIC_LITERAL)
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new UndefNode node.
+ */
+static pm_undef_node_t *
+pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
+    assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
+    pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
+
+    *node = (pm_undef_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_UNDEF_NODE, 0, token),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
+        .names = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append a name to an undef node.
+ */
+static void
+pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
+    node->base.location.end = name->location.end;
+    pm_node_list_append(&node->names, name);
+}
+
+/**
+ * Allocate a new UnlessNode node.
+ */
+static pm_unless_node_t *
+pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+
+    pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
+    pm_node_t *end = statements == NULL ? predicate : UP(statements);
+
+    *node = (pm_unless_node_t) {
+        .base = PM_NODE_INIT_TOKEN_NODE(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, keyword, end),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .predicate = predicate,
+        .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
+        .statements = statements,
+        .else_clause = NULL,
+        .end_keyword_loc = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize new UnlessNode node in the modifier form.
+ */
+static pm_unless_node_t *
+pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+    pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
+
+    pm_statements_node_t *statements = pm_statements_node_create(parser);
+    pm_statements_node_body_append(parser, statements, statement, true);
+
+    *node = (pm_unless_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_UNLESS_NODE, PM_NODE_FLAG_NEWLINE, statement, predicate),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
+        .predicate = predicate,
+        .then_keyword_loc = { 0 },
+        .statements = statements,
+        .else_clause = NULL,
+        .end_keyword_loc = { 0 }
+    };
+
+    return node;
+}
+
+static inline void
+pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
+    node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+    node->base.location.end = end_keyword->end;
+}
+
+/**
+ * Loop modifiers could potentially modify an expression that contains block
+ * exits. In this case we need to loop through them and remove them from the
+ * list of block exits so that they do not later get marked as invalid.
+ */
+static void
+pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
+    assert(parser->current_block_exits != NULL);
+
+    // All of the block exits that we want to remove should be within the
+    // statements, and since we are modifying the statements, we shouldn't have
+    // to check the end location.
+    const uint8_t *start = statements->base.location.start;
+
+    for (size_t index = parser->current_block_exits->size; index > 0; index--) {
+        pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
+        if (block_exit->location.start < start) break;
+
+        // Implicitly remove from the list by lowering the size.
+        parser->current_block_exits->size--;
+    }
+}
+
+/**
+ * Allocate a new UntilNode node.
+ */
+static pm_until_node_t *
+pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+    pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+
+    *node = (pm_until_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_UNTIL_NODE, flags, keyword, closing),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .predicate = predicate,
+        .statements = statements
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new UntilNode node.
+ */
+static pm_until_node_t *
+pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+    pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+    pm_loop_modifier_block_exits(parser, statements);
+
+    *node = (pm_until_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_UNTIL_NODE, flags, statements, predicate),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = { 0 },
+        .closing_loc = { 0 },
+        .predicate = predicate,
+        .statements = statements
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new WhenNode node.
+ */
+static pm_when_node_t *
+pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
+    pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
+
+    *node = (pm_when_node_t) {
+        .base = PM_NODE_INIT_TOKEN(parser, PM_WHEN_NODE, 0, keyword),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .statements = NULL,
+        .then_keyword_loc = { 0 },
+        .conditions = { 0 }
+    };
+
+    return node;
+}
+
+/**
+ * Append a new condition to a when node.
+ */
+static void
+pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
+    node->base.location.end = condition->location.end;
+    pm_node_list_append(&node->conditions, condition);
+}
+
+/**
+ * Set the location of the then keyword of a when node.
+ */
+static inline void
+pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
+    node->base.location.end = then_keyword->end;
+    node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
+}
+
+/**
+ * Set the statements list of a when node.
+ */
+static void
+pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
+    if (statements->base.location.end > node->base.location.end) {
+        node->base.location.end = statements->base.location.end;
+    }
+
+    node->statements = statements;
+}
+
+/**
+ * Allocate a new WhileNode node.
+ */
+static pm_while_node_t *
+pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+    pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+
+    *node = (pm_while_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_WHILE_NODE, flags, keyword, closing),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
+        .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .predicate = predicate,
+        .statements = statements
+    };
+
+    return node;
+}
+
+/**
+ * Allocate a new WhileNode node.
+ */
+static pm_while_node_t *
+pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
+    pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
+    pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
+    pm_loop_modifier_block_exits(parser, statements);
+
+    *node = (pm_while_node_t) {
+        .base = PM_NODE_INIT_NODES(parser, PM_WHILE_NODE, flags, statements, predicate),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .do_keyword_loc = { 0 },
+        .closing_loc = { 0 },
+        .predicate = predicate,
+        .statements = statements
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new synthesized while loop.
+ */
+static pm_while_node_t *
+pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
+    pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
+
+    *node = (pm_while_node_t) {
+        .base = PM_NODE_INIT_BASE(parser, PM_WHILE_NODE, 0),
+        .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
+        .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
+        .closing_loc = PM_LOCATION_NULL_VALUE(parser),
+        .predicate = predicate,
+        .statements = statements
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new XStringNode node with the given unescaped
+ * string.
+ */
+static pm_x_string_node_t *
+pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
+    pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
+
+    *node = (pm_x_string_node_t) {
+        .base = PM_NODE_INIT_TOKENS(parser, PM_X_STRING_NODE, PM_STRING_FLAGS_FROZEN, opening, closing),
+        .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
+        .content_loc = PM_LOCATION_TOKEN_VALUE(content),
+        .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = *unescaped
+    };
+
+    return node;
+}
+
+/**
+ * Allocate and initialize a new XStringNode node.
+ */
+static inline pm_x_string_node_t *
+pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
+}
+
+/**
+ * Allocate a new YieldNode node.
+ */
+static pm_yield_node_t *
+pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
+    pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
+
+    const uint8_t *end;
+    if (rparen_loc->start != NULL) {
+        end = rparen_loc->end;
+    } else if (arguments != NULL) {
+        end = arguments->base.location.end;
+    } else if (lparen_loc->start != NULL) {
+        end = lparen_loc->end;
+    } else {
+        end = keyword->end;
+    }
+
+    *node = (pm_yield_node_t) {
+        .base = PM_NODE_INIT(parser, PM_YIELD_NODE, 0, keyword->start, end),
+        .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
+        .lparen_loc = *lparen_loc,
+        .arguments = arguments,
+        .rparen_loc = *rparen_loc
+    };
+
+    return node;
+}
+
+/**
+ * Check if any of the currently visible scopes contain a local variable
+ * described by the given constant id.
+ */
+static int
+pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
+    pm_scope_t *scope = parser->current_scope;
+    int depth = 0;
+
+    while (scope != NULL) {
+        if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
+        if (scope->closed) break;
+
+        scope = scope->previous;
+        depth++;
+    }
+
+    return -1;
+}
+
+/**
+ * Check if any of the currently visible scopes contain a local variable
+ * described by the given token. This function implicitly inserts a constant
+ * into the constant pool.
+ */
+static inline int
+pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
+    return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
+}
+
+/**
+ * Add a constant id to the local table of the current scope.
+ */
+static inline void
+pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+    pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
+}
+
+/**
+ * Add a local variable from a location to the current scope.
+ */
+static pm_constant_id_t
+pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
+    if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
+    return constant_id;
+}
+
+/**
+ * Add a local variable from a token to the current scope.
+ */
+static inline pm_constant_id_t
+pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
+    return pm_parser_local_add_location(parser, token->start, token->end, reads);
+}
+
+/**
+ * Add a local variable from an owned string to the current scope.
+ */
+static pm_constant_id_t
+pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
+    pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
+    if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
+    return constant_id;
+}
+
+/**
+ * Add a local variable from a constant string to the current scope.
+ */
+static pm_constant_id_t
+pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
+    pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
+    if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
+    return constant_id;
+}
+
+/**
+ * Add a parameter name to the current scope and check whether the name of the
+ * parameter is unique or not.
+ *
+ * Returns `true` if this is a duplicate parameter name, otherwise returns
+ * false.
+ */
+static bool
+pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
+    // We want to check whether the parameter name is a numbered parameter or
+    // not.
+    pm_refute_numbered_parameter(parser, name->start, name->end);
+
+    // Otherwise we'll fetch the constant id for the parameter name and check
+    // whether it's already in the current scope.
+    pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
+
+    if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
+        // Add an error if the parameter doesn't start with _ and has been seen before
+        if ((name->start < name->end) && (*name->start != '_')) {
+            pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
+        }
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Pop the current scope off the scope stack.
+ */
+static void
+pm_parser_scope_pop(pm_parser_t *parser) {
+    pm_scope_t *scope = parser->current_scope;
+    parser->current_scope = scope->previous;
+    pm_locals_free(&scope->locals);
+    pm_node_list_free(&scope->implicit_parameters);
+    xfree(scope);
+}
+
+/******************************************************************************/
+/* Stack helpers                                                              */
+/******************************************************************************/
+
+/**
+ * Pushes a value onto the stack.
+ */
+static inline void
+pm_state_stack_push(pm_state_stack_t *stack, bool value) {
+    *stack = (*stack << 1) | (value & 1);
+}
+
+/**
+ * Pops a value off the stack.
+ */
+static inline void
+pm_state_stack_pop(pm_state_stack_t *stack) {
+    *stack >>= 1;
+}
+
+/**
+ * Returns the value at the top of the stack.
+ */
+static inline bool
+pm_state_stack_p(const pm_state_stack_t *stack) {
+    return *stack & 1;
+}
+
+static inline void
+pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
+    // Use the negation of the value to prevent stack overflow.
+    pm_state_stack_push(&parser->accepts_block_stack, !value);
+}
+
+static inline void
+pm_accepts_block_stack_pop(pm_parser_t *parser) {
+    pm_state_stack_pop(&parser->accepts_block_stack);
+}
+
+static inline bool
+pm_accepts_block_stack_p(pm_parser_t *parser) {
+    return !pm_state_stack_p(&parser->accepts_block_stack);
+}
+
+static inline void
+pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
+    pm_state_stack_push(&parser->do_loop_stack, value);
+}
+
+static inline void
+pm_do_loop_stack_pop(pm_parser_t *parser) {
+    pm_state_stack_pop(&parser->do_loop_stack);
+}
+
+static inline bool
+pm_do_loop_stack_p(pm_parser_t *parser) {
+    return pm_state_stack_p(&parser->do_loop_stack);
+}
+
+/******************************************************************************/
+/* Lexer check helpers                                                        */
+/******************************************************************************/
+
+/**
+ * Get the next character in the source starting from +cursor+. If that position
+ * is beyond the end of the source then return '\0'.
+ */
+static inline uint8_t
+peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
+    if (cursor < parser->end) {
+        return *cursor;
+    } else {
+        return '\0';
+    }
+}
+
+/**
+ * Get the next character in the source starting from parser->current.end and
+ * adding the given offset. If that position is beyond the end of the source
+ * then return '\0'.
+ */
+static inline uint8_t
+peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
+    return peek_at(parser, parser->current.end + offset);
+}
+
+/**
+ * Get the next character in the source starting from parser->current.end. If
+ * that position is beyond the end of the source then return '\0'.
+ */
+static inline uint8_t
+peek(const pm_parser_t *parser) {
+    return peek_at(parser, parser->current.end);
+}
+
+/**
+ * If the character to be read matches the given value, then returns true and
+ * advances the current pointer.
+ */
+static inline bool
+match(pm_parser_t *parser, uint8_t value) {
+    if (peek(parser) == value) {
+        parser->current.end++;
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Return the length of the line ending string starting at +cursor+, or 0 if it
+ * is not a line ending. This function is intended to be CRLF/LF agnostic.
+ */
+static inline size_t
+match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
+    if (peek_at(parser, cursor) == '\n') {
+        return 1;
+    }
+    if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
+        return 2;
+    }
+    return 0;
+}
+
+/**
+ * Return the length of the line ending string starting at
+ * `parser->current.end + offset`, or 0 if it is not a line ending. This
+ * function is intended to be CRLF/LF agnostic.
+ */
+static inline size_t
+match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
+    return match_eol_at(parser, parser->current.end + offset);
+}
+
+/**
+ * Return the length of the line ending string starting at parser->current.end,
+ * or 0 if it is not a line ending. This function is intended to be CRLF/LF
+ * agnostic.
+ */
+static inline size_t
+match_eol(pm_parser_t *parser) {
+    return match_eol_at(parser, parser->current.end);
+}
+
+/**
+ * Skip to the next newline character or NUL byte.
+ */
+static inline const uint8_t *
+next_newline(const uint8_t *cursor, ptrdiff_t length) {
+    assert(length >= 0);
+
+    // Note that it's okay for us to use memchr here to look for \n because none
+    // of the encodings that we support have \n as a component of a multi-byte
+    // character.
+    return memchr(cursor, '\n', (size_t) length);
+}
+
+/**
+ * This is equivalent to the predicate of warn_balanced in CRuby.
+ */
+static inline bool
+ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
+    return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
+}
+
+/**
+ * Here we're going to check if this is a "magic" comment, and perform whatever
+ * actions are necessary for it here.
+ */
+static bool
+parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    const pm_encoding_t *encoding = pm_encoding_find(start, end);
+
+    if (encoding != NULL) {
+        if (parser->encoding != encoding) {
+            parser->encoding = encoding;
+            if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
+        }
+
+        parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
+        return true;
+    }
+
+    return false;
+}
+
+/**
+ * Look for a specific pattern of "coding" and potentially set the encoding on
+ * the parser.
+ */
+static void
+parser_lex_magic_comment_encoding(pm_parser_t *parser) {
+    const uint8_t *cursor = parser->current.start + 1;
+    const uint8_t *end = parser->current.end;
+
+    bool separator = false;
+    while (true) {
+        if (end - cursor <= 6) return;
+        switch (cursor[6]) {
+            case 'C': case 'c': cursor += 6; continue;
+            case 'O': case 'o': cursor += 5; continue;
+            case 'D': case 'd': cursor += 4; continue;
+            case 'I': case 'i': cursor += 3; continue;
+            case 'N': case 'n': cursor += 2; continue;
+            case 'G': case 'g': cursor += 1; continue;
+            case '=': case ':':
+                separator = true;
+                cursor += 6;
+                break;
+            default:
+                cursor += 6;
+                if (pm_char_is_whitespace(*cursor)) break;
+                continue;
+        }
+        if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
+        separator = false;
+    }
+
+    while (true) {
+        do {
+            if (++cursor >= end) return;
+        } while (pm_char_is_whitespace(*cursor));
+
+        if (separator) break;
+        if (*cursor != '=' && *cursor != ':') return;
+
+        separator = true;
+        cursor++;
+    }
+
+    const uint8_t *value_start = cursor;
+    while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
+
+    if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
+        // If we were unable to parse the encoding value, then we've got an
+        // issue because we didn't understand the encoding that the user was
+        // trying to use. In this case we'll keep using the default encoding but
+        // add an error to the parser to indicate an unsuccessful parse.
+        pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
+    }
+}
+
+typedef enum {
+    PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
+    PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
+    PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
+} pm_magic_comment_boolean_value_t;
+
+/**
+ * Check if this is a magic comment that includes the frozen_string_literal
+ * pragma. If it does, set that field on the parser.
+ */
+static pm_magic_comment_boolean_value_t
+parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
+    if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
+        return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
+    } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
+        return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
+    } else {
+        return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
+    }
+}
+
+static inline bool
+pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
+    return b == '\'' || b == '"' || b == ':' || b == ';';
+}
+
+/**
+ * Find an emacs magic comment marker (-*-) within the given bounds. If one is
+ * found, it returns a pointer to the start of the marker. Otherwise it returns
+ * NULL.
+ */
+static inline const uint8_t *
+parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
+    while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
+        if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
+            return cursor;
+        }
+        cursor++;
+    }
+    return NULL;
+}
+
+/**
+ * Parse the current token on the parser to see if it's a magic comment and
+ * potentially perform some action based on that. A regular expression that this
+ * function is effectively matching is:
+ *
+ *     %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*"
+ *
+ * It returns true if it consumes the entire comment. Otherwise it returns
+ * false.
+ */
+static inline bool
+parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
+    bool result = true;
+
+    const uint8_t *start = parser->current.start + 1;
+    const uint8_t *end = parser->current.end;
+    if (end - start <= 7) return false;
+
+    const uint8_t *cursor;
+    bool indicator = false;
+
+    if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
+        start = cursor + 3;
+
+        if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
+            end = cursor;
+            indicator = true;
+        } else {
+            // If we have a start marker but not an end marker, then we cannot
+            // have a magic comment.
+            return false;
+        }
+    }
+
+    cursor = start;
+    while (cursor < end) {
+        while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+
+        const uint8_t *key_start = cursor;
+        while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
+
+        const uint8_t *key_end = cursor;
+        while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
+        if (cursor == end) break;
+
+        if (*cursor == ':') {
+            cursor++;
+        } else {
+            if (!indicator) return false;
+            continue;
+        }
+
+        while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
+        if (cursor == end) break;
+
+        const uint8_t *value_start;
+        const uint8_t *value_end;
+
+        if (*cursor == '"') {
+            value_start = ++cursor;
+            for (; cursor < end && *cursor != '"'; cursor++) {
+                if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
+            }
+            value_end = cursor;
+            if (cursor < end && *cursor == '"') cursor++;
+        } else {
+            value_start = cursor;
+            while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
+            value_end = cursor;
+        }
+
+        if (indicator) {
+            while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
+        } else {
+            while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
+            if (cursor != end) return false;
+        }
+
+        // Here, we need to do some processing on the key to swap out dashes for
+        // underscores. We only need to do this if there _is_ a dash in the key.
+        pm_string_t key;
+        const size_t key_length = (size_t) (key_end - key_start);
+        const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
+
+        if (dash == NULL) {
+            pm_string_shared_init(&key, key_start, key_end);
+        } else {
+            uint8_t *buffer = xmalloc(key_length);
+            if (buffer == NULL) break;
+
+            memcpy(buffer, key_start, key_length);
+            buffer[dash - key_start] = '_';
+
+            while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
+                buffer[dash - key_start] = '_';
+            }
+
+            pm_string_owned_init(&key, buffer, key_length);
+        }
+
+        // Finally, we can start checking the key against the list of known
+        // magic comment keys, and potentially change state based on that.
+        const uint8_t *key_source = pm_string_source(&key);
+        uint32_t value_length = (uint32_t) (value_end - value_start);
+
+        // We only want to attempt to compare against encoding comments if it's
+        // the first line in the file (or the second in the case of a shebang).
+        if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
+            if (
+                (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
+                (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
+            ) {
+                result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
+            }
+        }
+
+        if (key_length == 11) {
+            if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
+                switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
+                    case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
+                        PM_PARSER_WARN_TOKEN_FORMAT(
+                            parser,
+                            parser->current,
+                            PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
+                            (int) key_length,
+                            (const char *) key_source,
+                            (int) value_length,
+                            (const char *) value_start
+                        );
+                        break;
+                    case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
+                        parser->warn_mismatched_indentation = false;
+                        break;
+                    case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
+                        parser->warn_mismatched_indentation = true;
+                        break;
+                }
+            }
+        } else if (key_length == 21) {
+            if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
+                // We only want to handle frozen string literal comments if it's
+                // before any semantic tokens have been seen.
+                if (semantic_token_seen) {
+                    pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
+                } else {
+                    switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
+                        case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
+                            PM_PARSER_WARN_TOKEN_FORMAT(
+                                parser,
+                                parser->current,
+                                PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
+                                (int) key_length,
+                                (const char *) key_source,
+                                (int) value_length,
+                                (const char *) value_start
+                            );
+                            break;
+                        case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
+                            parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
+                            break;
+                        case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
+                            parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED;
+                            break;
+                    }
+                }
+            }
+        } else if (key_length == 24) {
+            if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
+                const uint8_t *cursor = parser->current.start;
+                while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
+
+                if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
+                    pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
+                } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
+                    pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
+                } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
+                    pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
+                } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
+                    pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
+                } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
+                    pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
+                } else {
+                    PM_PARSER_WARN_TOKEN_FORMAT(
+                        parser,
+                        parser->current,
+                        PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
+                        (int) key_length,
+                        (const char *) key_source,
+                        (int) value_length,
+                        (const char *) value_start
+                    );
+                }
+            }
+        }
+
+        // When we're done, we want to free the string in case we had to
+        // allocate memory for it.
+        pm_string_free(&key);
+
+        // Allocate a new magic comment node to append to the parser's list.
+        pm_magic_comment_t *magic_comment;
+        if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
+            magic_comment->key_start = key_start;
+            magic_comment->value_start = value_start;
+            magic_comment->key_length = (uint32_t) key_length;
+            magic_comment->value_length = value_length;
+            pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
+        }
+    }
+
+    return result;
+}
+
+/******************************************************************************/
+/* Context manipulations                                                      */
+/******************************************************************************/
+
+static const uint32_t context_terminators[] = {
+    [PM_CONTEXT_NONE] = 0,
+    [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
+    [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+    [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+    [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+    [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
+    [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
+    [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
+    [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+    [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
+    [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+    [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+    [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
+    [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
+    [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
+};
+
+static inline bool
+context_terminator(pm_context_t context, pm_token_t *token) {
+    return token->type < 32 && (context_terminators[context] & (1U << token->type));
+}
+
+/**
+ * Returns the context that the given token is found to be terminating, or
+ * returns PM_CONTEXT_NONE.
+ */
+static pm_context_t
+context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
+    pm_context_node_t *context_node = parser->current_context;
+
+    while (context_node != NULL) {
+        if (context_terminator(context_node->context, token)) return context_node->context;
+        context_node = context_node->prev;
+    }
+
+    return PM_CONTEXT_NONE;
+}
+
+static bool
+context_push(pm_parser_t *parser, pm_context_t context) {
+    pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
+    if (context_node == NULL) return false;
+
+    *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
+
+    if (parser->current_context == NULL) {
+        parser->current_context = context_node;
+    } else {
+        context_node->prev = parser->current_context;
+        parser->current_context = context_node;
+    }
+
+    return true;
+}
+
+static void
+context_pop(pm_parser_t *parser) {
+    pm_context_node_t *prev = parser->current_context->prev;
+    xfree(parser->current_context);
+    parser->current_context = prev;
+}
+
+static bool
+context_p(const pm_parser_t *parser, pm_context_t context) {
+    pm_context_node_t *context_node = parser->current_context;
+
+    while (context_node != NULL) {
+        if (context_node->context == context) return true;
+        context_node = context_node->prev;
+    }
+
+    return false;
+}
+
+static bool
+context_def_p(const pm_parser_t *parser) {
+    pm_context_node_t *context_node = parser->current_context;
+
+    while (context_node != NULL) {
+        switch (context_node->context) {
+            case PM_CONTEXT_DEF:
+            case PM_CONTEXT_DEF_PARAMS:
+            case PM_CONTEXT_DEF_ENSURE:
+            case PM_CONTEXT_DEF_RESCUE:
+            case PM_CONTEXT_DEF_ELSE:
+                return true;
+            case PM_CONTEXT_CLASS:
+            case PM_CONTEXT_CLASS_ENSURE:
+            case PM_CONTEXT_CLASS_RESCUE:
+            case PM_CONTEXT_CLASS_ELSE:
+            case PM_CONTEXT_MODULE:
+            case PM_CONTEXT_MODULE_ENSURE:
+            case PM_CONTEXT_MODULE_RESCUE:
+            case PM_CONTEXT_MODULE_ELSE:
+            case PM_CONTEXT_SCLASS:
+            case PM_CONTEXT_SCLASS_ENSURE:
+            case PM_CONTEXT_SCLASS_RESCUE:
+            case PM_CONTEXT_SCLASS_ELSE:
+                return false;
+            default:
+                context_node = context_node->prev;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * Returns a human readable string for the given context, used in error
+ * messages.
+ */
+static const char *
+context_human(pm_context_t context) {
+    switch (context) {
+        case PM_CONTEXT_NONE:
+            assert(false && "unreachable");
+            return "";
+        case PM_CONTEXT_BEGIN: return "begin statement";
+        case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
+        case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+        case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
+        case PM_CONTEXT_CASE_WHEN: return "'when' clause";
+        case PM_CONTEXT_CASE_IN: return "'in' clause";
+        case PM_CONTEXT_CLASS: return "class definition";
+        case PM_CONTEXT_DEF: return "method definition";
+        case PM_CONTEXT_DEF_PARAMS: return "method parameters";
+        case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
+        case PM_CONTEXT_DEFINED: return "'defined?' expression";
+        case PM_CONTEXT_ELSE:
+        case PM_CONTEXT_BEGIN_ELSE:
+        case PM_CONTEXT_BLOCK_ELSE:
+        case PM_CONTEXT_CLASS_ELSE:
+        case PM_CONTEXT_DEF_ELSE:
+        case PM_CONTEXT_LAMBDA_ELSE:
+        case PM_CONTEXT_MODULE_ELSE:
+        case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
+        case PM_CONTEXT_ELSIF: return "'elsif' clause";
+        case PM_CONTEXT_EMBEXPR: return "embedded expression";
+        case PM_CONTEXT_BEGIN_ENSURE:
+        case PM_CONTEXT_BLOCK_ENSURE:
+        case PM_CONTEXT_CLASS_ENSURE:
+        case PM_CONTEXT_DEF_ENSURE:
+        case PM_CONTEXT_LAMBDA_ENSURE:
+        case PM_CONTEXT_MODULE_ENSURE:
+        case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
+        case PM_CONTEXT_FOR: return "for loop";
+        case PM_CONTEXT_FOR_INDEX: return "for loop index";
+        case PM_CONTEXT_IF: return "if statement";
+        case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
+        case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
+        case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
+        case PM_CONTEXT_MAIN: return "top level context";
+        case PM_CONTEXT_MODULE: return "module definition";
+        case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
+        case PM_CONTEXT_PARENS: return "parentheses";
+        case PM_CONTEXT_POSTEXE: return "'END' block";
+        case PM_CONTEXT_PREDICATE: return "predicate";
+        case PM_CONTEXT_PREEXE: return "'BEGIN' block";
+        case PM_CONTEXT_BEGIN_RESCUE:
+        case PM_CONTEXT_BLOCK_RESCUE:
+        case PM_CONTEXT_CLASS_RESCUE:
+        case PM_CONTEXT_DEF_RESCUE:
+        case PM_CONTEXT_LAMBDA_RESCUE:
+        case PM_CONTEXT_MODULE_RESCUE:
+        case PM_CONTEXT_RESCUE_MODIFIER:
+        case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
+        case PM_CONTEXT_SCLASS: return "singleton class definition";
+        case PM_CONTEXT_TERNARY: return "ternary expression";
+        case PM_CONTEXT_UNLESS: return "unless statement";
+        case PM_CONTEXT_UNTIL: return "until statement";
+        case PM_CONTEXT_WHILE: return "while statement";
+    }
+
+    assert(false && "unreachable");
+    return "";
+}
+
+/******************************************************************************/
+/* Specific token lexers                                                      */
+/******************************************************************************/
+
+static inline void
+pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
+    if (invalid != NULL) {
+        pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
+        pm_parser_err(parser, invalid, invalid + 1, diag_id);
+    }
+}
+
+static size_t
+pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
+    const uint8_t *invalid = NULL;
+    size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
+    pm_strspn_number_validate(parser, string, length, invalid);
+    return length;
+}
+
+static size_t
+pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
+    const uint8_t *invalid = NULL;
+    size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
+    pm_strspn_number_validate(parser, string, length, invalid);
+    return length;
+}
+
+static size_t
+pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
+    const uint8_t *invalid = NULL;
+    size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
+    pm_strspn_number_validate(parser, string, length, invalid);
+    return length;
+}
+
+static size_t
+pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
+    const uint8_t *invalid = NULL;
+    size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
+    pm_strspn_number_validate(parser, string, length, invalid);
+    return length;
+}
+
+static pm_token_type_t
+lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
+    pm_token_type_t type = PM_TOKEN_INTEGER;
+
+    // Here we're going to attempt to parse the optional decimal portion of a
+    // float. If it's not there, then it's okay and we'll just continue on.
+    if (peek(parser) == '.') {
+        if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
+            parser->current.end += 2;
+            parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+            type = PM_TOKEN_FLOAT;
+        } else {
+            // If we had a . and then something else, then it's not a float
+            // suffix on a number it's a method call or something else.
+            return type;
+        }
+    }
+
+    // Here we're going to attempt to parse the optional exponent portion of a
+    // float. If it's not there, it's okay and we'll just continue on.
+    if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
+        if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
+            parser->current.end += 2;
+
+            if (pm_char_is_decimal_digit(peek(parser))) {
+                parser->current.end++;
+                parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+            } else {
+                pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
+            }
+        } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
+            parser->current.end++;
+            parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+        } else {
+            return type;
+        }
+
+        *seen_e = true;
+        type = PM_TOKEN_FLOAT;
+    }
+
+    return type;
+}
+
+static pm_token_type_t
+lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
+    pm_token_type_t type = PM_TOKEN_INTEGER;
+    *seen_e = false;
+
+    if (peek_offset(parser, -1) == '0') {
+        switch (*parser->current.end) {
+            // 0d1111 is a decimal number
+            case 'd':
+            case 'D':
+                parser->current.end++;
+                if (pm_char_is_decimal_digit(peek(parser))) {
+                    parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+                } else {
+                    match(parser, '_');
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
+                }
+
+                break;
+
+            // 0b1111 is a binary number
+            case 'b':
+            case 'B':
+                parser->current.end++;
+                if (pm_char_is_binary_digit(peek(parser))) {
+                    parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
+                } else {
+                    match(parser, '_');
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
+                }
+
+                parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
+                break;
+
+            // 0o1111 is an octal number
+            case 'o':
+            case 'O':
+                parser->current.end++;
+                if (pm_char_is_octal_digit(peek(parser))) {
+                    parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
+                } else {
+                    match(parser, '_');
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
+                }
+
+                parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+                break;
+
+            // 01111 is an octal number
+            case '_':
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+                parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
+                parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+                break;
+
+            // 0x1111 is a hexadecimal number
+            case 'x':
+            case 'X':
+                parser->current.end++;
+                if (pm_char_is_hexadecimal_digit(peek(parser))) {
+                    parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
+                } else {
+                    match(parser, '_');
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
+                }
+
+                parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
+                break;
+
+            // 0.xxx is a float
+            case '.': {
+                type = lex_optional_float_suffix(parser, seen_e);
+                break;
+            }
+
+            // 0exxx is a float
+            case 'e':
+            case 'E': {
+                type = lex_optional_float_suffix(parser, seen_e);
+                break;
+            }
+        }
+    } else {
+        // If it didn't start with a 0, then we'll lex as far as we can into a
+        // decimal number.
+        parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+
+        // Afterward, we'll lex as far as we can into an optional float suffix.
+        type = lex_optional_float_suffix(parser, seen_e);
+    }
+
+    // At this point we have a completed number, but we want to provide the user
+    // with a good experience if they put an additional .xxx fractional
+    // component on the end, so we'll check for that here.
+    if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
+        const uint8_t *fraction_start = parser->current.end;
+        const uint8_t *fraction_end = parser->current.end + 2;
+        fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
+        pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
+    }
+
+    return type;
+}
+
+static pm_token_type_t
+lex_numeric(pm_parser_t *parser) {
+    pm_token_type_t type = PM_TOKEN_INTEGER;
+    parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+
+    if (parser->current.end < parser->end) {
+        bool seen_e = false;
+        type = lex_numeric_prefix(parser, &seen_e);
+
+        const uint8_t *end = parser->current.end;
+        pm_token_type_t suffix_type = type;
+
+        if (type == PM_TOKEN_INTEGER) {
+            if (match(parser, 'r')) {
+                suffix_type = PM_TOKEN_INTEGER_RATIONAL;
+
+                if (match(parser, 'i')) {
+                    suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
+                }
+            } else if (match(parser, 'i')) {
+                suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
+            }
+        } else {
+            if (!seen_e && match(parser, 'r')) {
+                suffix_type = PM_TOKEN_FLOAT_RATIONAL;
+
+                if (match(parser, 'i')) {
+                    suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
+                }
+            } else if (match(parser, 'i')) {
+                suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
+            }
+        }
+
+        const uint8_t b = peek(parser);
+        if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
+            parser->current.end = end;
+        } else {
+            type = suffix_type;
+        }
+    }
+
+    return type;
+}
+
+static pm_token_type_t
+lex_global_variable(pm_parser_t *parser) {
+    if (parser->current.end >= parser->end) {
+        pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
+        return PM_TOKEN_GLOBAL_VARIABLE;
+    }
+
+    // True if multiple characters are allowed after the declaration of the
+    // global variable. Not true when it starts with "$-".
+    bool allow_multiple = true;
+
+    switch (*parser->current.end) {
+        case '~':  // $~: match-data
+        case '*':  // $*: argv
+        case '$':  // $$: pid
+        case '?':  // $?: last status
+        case '!':  // $!: error string
+        case '@':  // $@: error position
+        case '/':  // $/: input record separator
+        case '\\': // $\: output record separator
+        case ';':  // $;: field separator
+        case ',':  // $,: output field separator
+        case '.':  // $.: last read line number
+        case '=':  // $=: ignorecase
+        case ':':  // $:: load path
+        case '<':  // $<: reading filename
+        case '>':  // $>: default output handle
+        case '\"': // $": already loaded files
+            parser->current.end++;
+            return PM_TOKEN_GLOBAL_VARIABLE;
+
+        case '&':  // $&: last match
+        case '`':  // $`: string before last match
+        case '\'': // $': string after last match
+        case '+':  // $+: string matches last paren.
+            parser->current.end++;
+            return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
+
+        case '0': {
+            parser->current.end++;
+            size_t width;
+
+            if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
+                do {
+                    parser->current.end += width;
+                } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
+
+                // $0 isn't allowed to be followed by anything.
+                pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+                PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
+            }
+
+            return PM_TOKEN_GLOBAL_VARIABLE;
+        }
+
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
+            return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
+
+        case '-':
+            parser->current.end++;
+            allow_multiple = false;
+            PRISM_FALLTHROUGH
+        default: {
+            size_t width;
+
+            if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
+                do {
+                    parser->current.end += width;
+                } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
+            } else if (pm_char_is_whitespace(peek(parser))) {
+                // If we get here, then we have a $ followed by whitespace,
+                // which is not allowed.
+                pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
+            } else {
+                // If we get here, then we have a $ followed by something that
+                // isn't recognized as a global variable.
+                pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+                const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
+            }
+
+            return PM_TOKEN_GLOBAL_VARIABLE;
+        }
+    }
+}
+
+/**
+ * This function checks if the current token matches a keyword. If it does, it
+ * returns the token type. Otherwise, it returns PM_TOKEN_EOF. The arguments are as follows:
+ *
+ * * `parser` - the parser object
+ * * `current_start` - pointer to the start of the current token
+ * * `value` - the literal string that we're checking for
+ * * `vlen` - the length of the token
+ * * `state` - the state that we should transition to if the token matches
+ * * `type` - the expected token type
+ * * `modifier_type` - the expected modifier token type
+ */
+static inline pm_token_type_t
+lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
+    if (memcmp(current_start, value, vlen) == 0) {
+        pm_lex_state_t last_state = parser->lex_state;
+
+        if (parser->lex_state & PM_LEX_STATE_FNAME) {
+            lex_state_set(parser, PM_LEX_STATE_ENDFN);
+        } else {
+            lex_state_set(parser, state);
+            if (state == PM_LEX_STATE_BEG) {
+                parser->command_start = true;
+            }
+
+            if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
+                lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                return modifier_type;
+            }
+        }
+
+        return type;
+    }
+
+    return PM_TOKEN_EOF;
+}
+
+static pm_token_type_t
+lex_identifier(pm_parser_t *parser, bool previous_command_start) {
+    // Lex as far as we can into the current identifier.
+    size_t width;
+    const uint8_t *end = parser->end;
+    const uint8_t *current_start = parser->current.start;
+    const uint8_t *current_end = parser->current.end;
+    bool encoding_changed = parser->encoding_changed;
+
+    if (encoding_changed) {
+        while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
+            current_end += width;
+        }
+    } else {
+        while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
+            current_end += width;
+        }
+    }
+    parser->current.end = current_end;
+
+    // Now cache the length of the identifier so that we can quickly compare it
+    // against known keywords.
+    width = (size_t) (current_end - current_start);
+
+    if (current_end < end) {
+        if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
+            // First we'll attempt to extend the identifier by a ! or ?. Then we'll
+            // check if we're returning the defined? keyword or just an identifier.
+            width++;
+
+            if (
+                ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
+                (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
+            ) {
+                // If we're in a position where we can accept a : at the end of an
+                // identifier, then we'll optionally accept it.
+                lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
+                (void) match(parser, ':');
+                return PM_TOKEN_LABEL;
+            }
+
+            if (parser->lex_state != PM_LEX_STATE_DOT) {
+                if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
+                    return PM_TOKEN_KEYWORD_DEFINED;
+                }
+            }
+
+            return PM_TOKEN_METHOD_NAME;
+        }
+
+        if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
+            // If we're in a position where we can accept a = at the end of an
+            // identifier, then we'll optionally accept it.
+            return PM_TOKEN_IDENTIFIER;
+        }
+
+        if (
+            ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
+            peek(parser) == ':' && peek_offset(parser, 1) != ':'
+        ) {
+            // If we're in a position where we can accept a : at the end of an
+            // identifier, then we'll optionally accept it.
+            lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
+            (void) match(parser, ':');
+            return PM_TOKEN_LABEL;
+        }
+    }
+
+    if (parser->lex_state != PM_LEX_STATE_DOT) {
+        pm_token_type_t type;
+        switch (width) {
+            case 2:
+                if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
+                    if (pm_do_loop_stack_p(parser)) {
+                        return PM_TOKEN_KEYWORD_DO_LOOP;
+                    }
+                    return PM_TOKEN_KEYWORD_DO;
+                }
+
+                if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                break;
+            case 3:
+                if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                break;
+            case 4:
+                if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                break;
+            case 5:
+                if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                break;
+            case 6:
+                if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
+                break;
+            case 8:
+                if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                break;
+            case 12:
+                if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
+                break;
+        }
+    }
+
+    if (encoding_changed) {
+        return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
+    }
+    return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
+}
+
+/**
+ * Returns true if the current token that the parser is considering is at the
+ * beginning of a line or the beginning of the source.
+ */
+static bool
+current_token_starts_line(pm_parser_t *parser) {
+    return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
+}
+
+/**
+ * When we hit a # while lexing something like a string, we need to potentially
+ * handle interpolation. This function performs that check. It returns a token
+ * type representing what it found. Those cases are:
+ *
+ * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The
+ *     caller should keep lexing.
+ * * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The
+ *     caller should return this token type.
+ * * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller
+ *     should return this token type.
+ * * PM_TOKEN_EMBVAR - An embedded variable was found. The caller should return
+ *     this token type.
+ */
+static pm_token_type_t
+lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
+    // If there is no content following this #, then we're at the end of
+    // the string and we can safely return string content.
+    if (pound + 1 >= parser->end) {
+        parser->current.end = pound + 1;
+        return PM_TOKEN_STRING_CONTENT;
+    }
+
+    // Now we'll check against the character that follows the #. If it constitutes
+    // valid interplation, we'll handle that, otherwise we'll return
+    // PM_TOKEN_NOT_PROVIDED.
+    switch (pound[1]) {
+        case '@': {
+            // In this case we may have hit an embedded instance or class variable.
+            if (pound + 2 >= parser->end) {
+                parser->current.end = pound + 1;
+                return PM_TOKEN_STRING_CONTENT;
+            }
+
+            // If we're looking at a @ and there's another @, then we'll skip past the
+            // second @.
+            const uint8_t *variable = pound + 2;
+            if (*variable == '@' && pound + 3 < parser->end) variable++;
+
+            if (char_is_identifier_start(parser, variable, parser->end - variable)) {
+                // At this point we're sure that we've either hit an embedded instance
+                // or class variable. In this case we'll first need to check if we've
+                // already consumed content.
+                if (pound > parser->current.start) {
+                    parser->current.end = pound;
+                    return PM_TOKEN_STRING_CONTENT;
+                }
+
+                // Otherwise we need to return the embedded variable token
+                // and then switch to the embedded variable lex mode.
+                lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
+                parser->current.end = pound + 1;
+                return PM_TOKEN_EMBVAR;
+            }
+
+            // If we didn't get a valid interpolation, then this is just regular
+            // string content. This is like if we get "#@-". In this case the caller
+            // should keep lexing.
+            parser->current.end = pound + 1;
+            return PM_TOKEN_NOT_PROVIDED;
+        }
+        case '$':
+            // In this case we may have hit an embedded global variable. If there's
+            // not enough room, then we'll just return string content.
+            if (pound + 2 >= parser->end) {
+                parser->current.end = pound + 1;
+                return PM_TOKEN_STRING_CONTENT;
+            }
+
+            // This is the character that we're going to check to see if it is the
+            // start of an identifier that would indicate that this is a global
+            // variable.
+            const uint8_t *check = pound + 2;
+
+            if (pound[2] == '-') {
+                if (pound + 3 >= parser->end) {
+                    parser->current.end = pound + 2;
+                    return PM_TOKEN_STRING_CONTENT;
+                }
+
+                check++;
+            }
+
+            // If the character that we're going to check is the start of an
+            // identifier, or we don't have a - and the character is a decimal number
+            // or a global name punctuation character, then we've hit an embedded
+            // global variable.
+            if (
+                char_is_identifier_start(parser, check, parser->end - check) ||
+                (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
+            ) {
+                // In this case we've hit an embedded global variable. First check to
+                // see if we've already consumed content. If we have, then we need to
+                // return that content as string content first.
+                if (pound > parser->current.start) {
+                    parser->current.end = pound;
+                    return PM_TOKEN_STRING_CONTENT;
+                }
+
+                // Otherwise, we need to return the embedded variable token and switch
+                // to the embedded variable lex mode.
+                lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
+                parser->current.end = pound + 1;
+                return PM_TOKEN_EMBVAR;
+            }
+
+            // In this case we've hit a #$ that does not indicate a global variable.
+            // In this case we'll continue lexing past it.
+            parser->current.end = pound + 1;
+            return PM_TOKEN_NOT_PROVIDED;
+        case '{':
+            // In this case it's the start of an embedded expression. If we have
+            // already consumed content, then we need to return that content as string
+            // content first.
+            if (pound > parser->current.start) {
+                parser->current.end = pound;
+                return PM_TOKEN_STRING_CONTENT;
+            }
+
+            parser->enclosure_nesting++;
+
+            // Otherwise we'll skip past the #{ and begin lexing the embedded
+            // expression.
+            lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
+            parser->current.end = pound + 2;
+            parser->command_start = true;
+            pm_do_loop_stack_push(parser, false);
+            return PM_TOKEN_EMBEXPR_BEGIN;
+        default:
+            // In this case we've hit a # that doesn't constitute interpolation. We'll
+            // mark that by returning the not provided token type. This tells the
+            // consumer to keep lexing forward.
+            parser->current.end = pound + 1;
+            return PM_TOKEN_NOT_PROVIDED;
+    }
+}
+
+static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
+static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
+static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
+static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
+static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
+
+/**
+ * This is a lookup table for whether or not an ASCII character is printable.
+ */
+static const bool ascii_printable_chars[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+};
+
+static inline bool
+char_is_ascii_printable(const uint8_t b) {
+    return (b < 0x80) && ascii_printable_chars[b];
+}
+
+/**
+ * Return the value that a hexadecimal digit character represents. For example,
+ * transform 'a' into 10, 'b' into 11, etc.
+ */
+static inline uint8_t
+escape_hexadecimal_digit(const uint8_t value) {
+    return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
+}
+
+/**
+ * Scan the 4 digits of a Unicode escape into the value. Returns the number of
+ * digits scanned. This function assumes that the characters have already been
+ * validated.
+ */
+static inline uint32_t
+escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location) {
+    uint32_t value = 0;
+    for (size_t index = 0; index < length; index++) {
+        if (index != 0) value <<= 4;
+        value |= escape_hexadecimal_digit(string[index]);
+    }
+
+    // Here we're going to verify that the value is actually a valid Unicode
+    // codepoint and not a surrogate pair.
+    if (value >= 0xD800 && value <= 0xDFFF) {
+        if (error_location != NULL) {
+            pm_parser_err(parser, error_location->start, error_location->end, PM_ERR_ESCAPE_INVALID_UNICODE);
+        } else {
+            pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
+        }
+        return 0xFFFD;
+    }
+
+    return value;
+}
+
+/**
+ * Escape a single character value based on the given flags.
+ */
+static inline uint8_t
+escape_byte(uint8_t value, const uint8_t flags) {
+    if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
+    if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
+    return value;
+}
+
+/**
+ * Write a unicode codepoint to the given buffer.
+ */
+static inline void
+escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
+    // \u escape sequences in string-like structures implicitly change the
+    // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
+    // literal.
+    if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
+        if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
+            PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+        }
+
+        parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
+    }
+
+    if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
+        pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
+        pm_buffer_append_byte(buffer, 0xEF);
+        pm_buffer_append_byte(buffer, 0xBF);
+        pm_buffer_append_byte(buffer, 0xBD);
+    }
+}
+
+/**
+ * When you're writing a byte to the unescape buffer, if the byte is non-ASCII
+ * (i.e., the top bit is set) then it locks in the encoding.
+ */
+static inline void
+escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
+    if (byte >= 0x80) {
+        if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+        }
+
+        parser->explicit_encoding = parser->encoding;
+    }
+
+    pm_buffer_append_byte(buffer, byte);
+}
+
+/**
+ * The regular expression engine doesn't support the same escape sequences as
+ * Ruby does. So first we have to read the escape sequence, and then we have to
+ * format it like the regular expression engine expects it. For example, in Ruby
+ * if we have:
+ *
+ *     /\M-\C-?/
+ *
+ * then the first byte is actually 255, so we have to rewrite this as:
+ *
+ *     /\xFF/
+ *
+ * Note that in this case there is a literal \ byte in the regular expression
+ * source so that the regular expression engine will perform its own unescaping.
+ */
+static inline void
+escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
+    if (flags & PM_ESCAPE_FLAG_REGEXP) {
+        pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
+    }
+
+    escape_write_byte_encoded(parser, buffer, byte);
+}
+
+/**
+ * Write each byte of the given escaped character into the buffer.
+ */
+static inline void
+escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
+    size_t width;
+    if (parser->encoding_changed) {
+        width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+    } else {
+        width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+    }
+
+    if (width == 1) {
+        escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
+    } else if (width > 1) {
+        // Valid multibyte character.  Just ignore escape.
+        pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
+        pm_buffer_append_bytes(b, parser->current.end, width);
+        parser->current.end += width;
+    } else {
+        // Assume the next character wasn't meant to be part of this escape
+        // sequence since it is invalid. Add an error and move on.
+        parser->current.end++;
+        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+    }
+}
+
+/**
+ * Warn about using a space or a tab character in an escape, as opposed to using
+ * \\s or \\t. Note that we can quite copy the source because the warning
+ * message replaces \\c with \\C.
+ */
+static void
+escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
+#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
+
+    PM_PARSER_WARN_TOKEN_FORMAT(
+        parser,
+        parser->current,
+        PM_WARN_INVALID_CHARACTER,
+        FLAG(flags),
+        FLAG(flag),
+        type
+    );
+
+#undef FLAG
+}
+
+/**
+ * Read the value of an escape into the buffer.
+ */
+static void
+escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
+    uint8_t peeked = peek(parser);
+    switch (peeked) {
+        case '\\': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
+            return;
+        }
+        case '\'': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
+            return;
+        }
+        case 'a': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
+            return;
+        }
+        case 'b': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
+            return;
+        }
+        case 'e': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
+            return;
+        }
+        case 'f': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
+            return;
+        }
+        case 'n': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
+            return;
+        }
+        case 'r': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
+            return;
+        }
+        case 's': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
+            return;
+        }
+        case 't': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
+            return;
+        }
+        case 'v': {
+            parser->current.end++;
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
+            return;
+        }
+        case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
+            uint8_t value = (uint8_t) (*parser->current.end - '0');
+            parser->current.end++;
+
+            if (pm_char_is_octal_digit(peek(parser))) {
+                value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
+                parser->current.end++;
+
+                if (pm_char_is_octal_digit(peek(parser))) {
+                    value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
+                    parser->current.end++;
+                }
+            }
+
+            value = escape_byte(value, flags);
+            escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
+            return;
+        }
+        case 'x': {
+            const uint8_t *start = parser->current.end - 1;
+
+            parser->current.end++;
+            uint8_t byte = peek(parser);
+
+            if (pm_char_is_hexadecimal_digit(byte)) {
+                uint8_t value = escape_hexadecimal_digit(byte);
+                parser->current.end++;
+
+                byte = peek(parser);
+                if (pm_char_is_hexadecimal_digit(byte)) {
+                    value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
+                    parser->current.end++;
+                }
+
+                value = escape_byte(value, flags);
+                if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                    if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
+                        pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
+                    } else {
+                        pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+                    }
+                }
+
+                escape_write_byte_encoded(parser, buffer, value);
+            } else {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
+            }
+
+            return;
+        }
+        case 'u': {
+            const uint8_t *start = parser->current.end - 1;
+            parser->current.end++;
+
+            if (parser->current.end == parser->end) {
+                const uint8_t *start = parser->current.end - 2;
+                PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+            } else if (peek(parser) == '{') {
+                const uint8_t *unicode_codepoints_start = parser->current.end - 2;
+                parser->current.end++;
+
+                size_t whitespace;
+                while (true) {
+                    if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
+                        parser->current.end += whitespace;
+                    } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
+                        // This is super hacky, but it gets us nicer error
+                        // messages because we can still pass it off to the
+                        // regular expression engine even if we hit an
+                        // unterminated regular expression.
+                        parser->current.end += 2;
+                    } else {
+                        break;
+                    }
+                }
+
+                const uint8_t *extra_codepoints_start = NULL;
+                int codepoints_count = 0;
+
+                while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
+                    const uint8_t *unicode_start = parser->current.end;
+                    size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
+
+                    if (hexadecimal_length > 6) {
+                        // \u{nnnn} character literal allows only 1-6 hexadecimal digits
+                        pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
+                    } else if (hexadecimal_length == 0) {
+                        // there are not hexadecimal characters
+
+                        if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                            // If this is a regular expression, we are going to
+                            // let the regular expression engine handle this
+                            // error instead of us.
+                            pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+                        } else {
+                            pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
+                            pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+                        }
+
+                        return;
+                    }
+
+                    parser->current.end += hexadecimal_length;
+                    codepoints_count++;
+                    if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
+                        extra_codepoints_start = unicode_start;
+                    }
+
+                    uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
+                    escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
+
+                    parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
+                }
+
+                // ?\u{nnnn} character literal should contain only one codepoint
+                // and cannot be like ?\u{nnnn mmmm}.
+                if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
+                    pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
+                }
+
+                if (parser->current.end == parser->end) {
+                    PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
+                } else if (peek(parser) == '}') {
+                    parser->current.end++;
+                } else {
+                    if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                        // If this is a regular expression, we are going to let
+                        // the regular expression engine handle this error
+                        // instead of us.
+                        pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+                    } else {
+                        pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+                    }
+                }
+
+                if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                    pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
+                }
+            } else {
+                size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
+
+                if (length == 0) {
+                    if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                        pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+                    } else {
+                        const uint8_t *start = parser->current.end - 2;
+                        PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+                    }
+                } else if (length == 4) {
+                    uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL);
+
+                    if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                        pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
+                    }
+
+                    escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
+                    parser->current.end += 4;
+                } else {
+                    parser->current.end += length;
+
+                    if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                        // If this is a regular expression, we are going to let
+                        // the regular expression engine handle this error
+                        // instead of us.
+                        pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
+                    } else {
+                        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
+                    }
+                }
+            }
+
+            return;
+        }
+        case 'c': {
+            parser->current.end++;
+            if (flags & PM_ESCAPE_FLAG_CONTROL) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+            }
+
+            if (parser->current.end == parser->end) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                return;
+            }
+
+            uint8_t peeked = peek(parser);
+            switch (peeked) {
+                case '?': {
+                    parser->current.end++;
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
+                    return;
+                }
+                case '\\':
+                    parser->current.end++;
+
+                    if (match(parser, 'u') || match(parser, 'U')) {
+                        pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+                        return;
+                    }
+
+                    escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+                    return;
+                case ' ':
+                    parser->current.end++;
+                    escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                case '\t':
+                    parser->current.end++;
+                    escape_read_warn(parser, flags, 0, "\\t");
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                default: {
+                    if (!char_is_ascii_printable(peeked)) {
+                        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                        return;
+                    }
+
+                    parser->current.end++;
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                }
+            }
+        }
+        case 'C': {
+            parser->current.end++;
+            if (flags & PM_ESCAPE_FLAG_CONTROL) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+            }
+
+            if (peek(parser) != '-') {
+                size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+                return;
+            }
+
+            parser->current.end++;
+            if (parser->current.end == parser->end) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                return;
+            }
+
+            uint8_t peeked = peek(parser);
+            switch (peeked) {
+                case '?': {
+                    parser->current.end++;
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
+                    return;
+                }
+                case '\\':
+                    parser->current.end++;
+
+                    if (match(parser, 'u') || match(parser, 'U')) {
+                        pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+                        return;
+                    }
+
+                    escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+                    return;
+                case ' ':
+                    parser->current.end++;
+                    escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                case '\t':
+                    parser->current.end++;
+                    escape_read_warn(parser, flags, 0, "\\t");
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                default: {
+                    if (!char_is_ascii_printable(peeked)) {
+                        size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                        pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+                        return;
+                    }
+
+                    parser->current.end++;
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                }
+            }
+        }
+        case 'M': {
+            parser->current.end++;
+            if (flags & PM_ESCAPE_FLAG_META) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
+            }
+
+            if (peek(parser) != '-') {
+                size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+                return;
+            }
+
+            parser->current.end++;
+            if (parser->current.end == parser->end) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+                return;
+            }
+
+            uint8_t peeked = peek(parser);
+            switch (peeked) {
+                case '\\':
+                    parser->current.end++;
+
+                    if (match(parser, 'u') || match(parser, 'U')) {
+                        pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+                        return;
+                    }
+
+                    escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
+                    return;
+                case ' ':
+                    parser->current.end++;
+                    escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+                    return;
+                case '\t':
+                    parser->current.end++;
+                    escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+                    return;
+                default:
+                    if (!char_is_ascii_printable(peeked)) {
+                        size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                        pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+                        return;
+                    }
+
+                    parser->current.end++;
+                    escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+                    return;
+            }
+        }
+        case '\r': {
+            if (peek_offset(parser, 1) == '\n') {
+                parser->current.end += 2;
+                escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
+                return;
+            }
+            PRISM_FALLTHROUGH
+        }
+        default: {
+            if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
+                size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+                pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+                return;
+            }
+            if (parser->current.end < parser->end) {
+                escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
+            } else {
+                pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
+            }
+            return;
+        }
+    }
+}
+
+/**
+ * This function is responsible for lexing either a character literal or the ?
+ * operator. The supported character literals are described below.
+ *
+ * \\a            bell, ASCII 07h (BEL)
+ * \\b            backspace, ASCII 08h (BS)
+ * \t             horizontal tab, ASCII 09h (TAB)
+ * \\n            newline (line feed), ASCII 0Ah (LF)
+ * \v             vertical tab, ASCII 0Bh (VT)
+ * \f             form feed, ASCII 0Ch (FF)
+ * \r             carriage return, ASCII 0Dh (CR)
+ * \\e            escape, ASCII 1Bh (ESC)
+ * \s             space, ASCII 20h (SPC)
+ * \\             backslash
+ * \nnn           octal bit pattern, where nnn is 1-3 octal digits ([0-7])
+ * \xnn           hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
+ * \unnnn         Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
+ * \u{nnnn ...}   Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
+ * \cx or \C-x    control character, where x is an ASCII printable character
+ * \M-x           meta character, where x is an ASCII printable character
+ * \M-\C-x        meta control character, where x is an ASCII printable character
+ * \M-\cx         same as above
+ * \\c\M-x        same as above
+ * \\c? or \C-?   delete, ASCII 7Fh (DEL)
+ */
+static pm_token_type_t
+lex_question_mark(pm_parser_t *parser) {
+    if (lex_state_end_p(parser)) {
+        lex_state_set(parser, PM_LEX_STATE_BEG);
+        return PM_TOKEN_QUESTION_MARK;
+    }
+
+    if (parser->current.end >= parser->end) {
+        pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
+        pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
+        return PM_TOKEN_CHARACTER_LITERAL;
+    }
+
+    if (pm_char_is_whitespace(*parser->current.end)) {
+        lex_state_set(parser, PM_LEX_STATE_BEG);
+        return PM_TOKEN_QUESTION_MARK;
+    }
+
+    lex_state_set(parser, PM_LEX_STATE_BEG);
+
+    if (match(parser, '\\')) {
+        lex_state_set(parser, PM_LEX_STATE_END);
+
+        pm_buffer_t buffer;
+        pm_buffer_init_capacity(&buffer, 3);
+
+        escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
+        pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+
+        return PM_TOKEN_CHARACTER_LITERAL;
+    } else {
+        size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+
+        // Ternary operators can have a ? immediately followed by an identifier
+        // which starts with an underscore. We check for this case here.
+        if (
+            !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
+            (
+                (parser->current.end + encoding_width >= parser->end) ||
+                !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
+            )
+        ) {
+            lex_state_set(parser, PM_LEX_STATE_END);
+            parser->current.end += encoding_width;
+            pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
+            return PM_TOKEN_CHARACTER_LITERAL;
+        }
+    }
+
+    return PM_TOKEN_QUESTION_MARK;
+}
+
+/**
+ * Lex a variable that starts with an @ sign (either an instance or class
+ * variable).
+ */
+static pm_token_type_t
+lex_at_variable(pm_parser_t *parser) {
+    pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
+    const uint8_t *end = parser->end;
+
+    size_t width;
+    if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
+        parser->current.end += width;
+
+        while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
+            parser->current.end += width;
+        }
+    } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
+        pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
+        if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
+            diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
+        }
+
+        size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
+    } else {
+        pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
+        pm_parser_err_token(parser, &parser->current, diag_id);
+    }
+
+    // If we're lexing an embedded variable, then we need to pop back into the
+    // parent lex context.
+    if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
+        lex_mode_pop(parser);
+    }
+
+    return type;
+}
+
+/**
+ * Optionally call out to the lex callback if one is provided.
+ */
+static inline void
+parser_lex_callback(pm_parser_t *parser) {
+    if (parser->lex_callback) {
+        parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
+    }
+}
+
+/**
+ * Return a new comment node of the specified type.
+ */
+static inline pm_comment_t *
+parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
+    pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
+    if (comment == NULL) return NULL;
+
+    *comment = (pm_comment_t) {
+        .type = type,
+        .location = { parser->current.start, parser->current.end }
+    };
+
+    return comment;
+}
+
+/**
+ * Lex out embedded documentation, and return when we have either hit the end of
+ * the file or the end of the embedded documentation. This calls the callback
+ * manually because only the lexer should see these tokens, not the parser.
+ */
+static pm_token_type_t
+lex_embdoc(pm_parser_t *parser) {
+    // First, lex out the EMBDOC_BEGIN token.
+    const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+
+    if (newline == NULL) {
+        parser->current.end = parser->end;
+    } else {
+        pm_newline_list_append(&parser->newline_list, newline);
+        parser->current.end = newline + 1;
+    }
+
+    parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
+    parser_lex_callback(parser);
+
+    // Now, create a comment that is going to be attached to the parser.
+    pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
+    if (comment == NULL) return PM_TOKEN_EOF;
+
+    // Now, loop until we find the end of the embedded documentation or the end
+    // of the file.
+    while (parser->current.end + 4 <= parser->end) {
+        parser->current.start = parser->current.end;
+
+        // If we've hit the end of the embedded documentation then we'll return
+        // that token here.
+        if (
+            (memcmp(parser->current.end, "=end", 4) == 0) &&
+            (
+                (parser->current.end + 4 == parser->end) || // end of file
+                pm_char_is_whitespace(parser->current.end[4]) || // whitespace
+                (parser->current.end[4] == '\0') || // NUL or end of script
+                (parser->current.end[4] == '\004') || // ^D
+                (parser->current.end[4] == '\032') // ^Z
+            )
+        ) {
+            const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+
+            if (newline == NULL) {
+                parser->current.end = parser->end;
+            } else {
+                pm_newline_list_append(&parser->newline_list, newline);
+                parser->current.end = newline + 1;
+            }
+
+            parser->current.type = PM_TOKEN_EMBDOC_END;
+            parser_lex_callback(parser);
+
+            comment->location.end = parser->current.end;
+            pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
+
+            return PM_TOKEN_EMBDOC_END;
+        }
+
+        // Otherwise, we'll parse until the end of the line and return a line of
+        // embedded documentation.
+        const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+
+        if (newline == NULL) {
+            parser->current.end = parser->end;
+        } else {
+            pm_newline_list_append(&parser->newline_list, newline);
+            parser->current.end = newline + 1;
+        }
+
+        parser->current.type = PM_TOKEN_EMBDOC_LINE;
+        parser_lex_callback(parser);
+    }
+
+    pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
+
+    comment->location.end = parser->current.end;
+    pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
+
+    return PM_TOKEN_EOF;
+}
+
+/**
+ * Set the current type to an ignored newline and then call the lex callback.
+ * This happens in a couple places depending on whether or not we have already
+ * lexed a comment.
+ */
+static inline void
+parser_lex_ignored_newline(pm_parser_t *parser) {
+    parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
+    parser_lex_callback(parser);
+}
+
+/**
+ * This function will be called when a newline is encountered. In some newlines,
+ * we need to check if there is a heredoc or heredocs that we have already lexed
+ * the body of that we need to now skip past. That will be indicated by the
+ * heredoc_end field on the parser.
+ *
+ * If it is set, then we need to skip past the heredoc body and then clear the
+ * heredoc_end field.
+ */
+static inline void
+parser_flush_heredoc_end(pm_parser_t *parser) {
+    assert(parser->heredoc_end <= parser->end);
+    parser->next_start = parser->heredoc_end;
+    parser->heredoc_end = NULL;
+}
+
+/**
+ * Returns true if the parser has lexed the last token on the current line.
+*/
+static bool
+parser_end_of_line_p(const pm_parser_t *parser) {
+    const uint8_t *cursor = parser->current.end;
+
+    while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
+        if (!pm_char_is_inline_whitespace(*cursor++)) return false;
+    }
+
+    return true;
+}
+
+/**
+ * When we're lexing certain types (strings, symbols, lists, etc.) we have
+ * string content associated with the tokens. For example:
+ *
+ *     "foo"
+ *
+ * In this case, the string content is foo. Since there is no escaping, there's
+ * no need to track additional information and the token can be returned as
+ * normal. However, if we have escape sequences:
+ *
+ *     "foo\n"
+ *
+ * then the bytes in the string are "f", "o", "o", "\", "n", but we want to
+ * provide our consumers with the string content "f", "o", "o", "\n". In these
+ * cases, when we find the first escape sequence, we initialize a pm_buffer_t
+ * to keep track of the string content. Then in the parser, it will
+ * automatically attach the string content to the node that it belongs to.
+ */
+typedef struct {
+    /**
+     * The buffer that we're using to keep track of the string content. It will
+     * only be initialized if we receive an escape sequence.
+     */
+    pm_buffer_t buffer;
+
+    /**
+     * The cursor into the source string that points to how far we have
+     * currently copied into the buffer.
+     */
+    const uint8_t *cursor;
+} pm_token_buffer_t;
+
+/**
+ * In order to properly set a regular expression's encoding and to validate
+ * the byte sequence for the underlying encoding we must process any escape
+ * sequences. The unescaped byte sequence will be stored in `buffer` just like
+ * for other string-like types. However, we also need to store the regular
+ * expression's source string. That string may be different from what we see
+ * during lexing because some escape sequences rewrite the source.
+ *
+ * This value will only be initialized for regular expressions and only if we
+ * receive an escape sequence. It will contain the regular expression's source
+ * string's byte sequence.
+ */
+typedef struct {
+    /** The embedded base buffer. */
+    pm_token_buffer_t base;
+
+    /** The buffer holding the regexp source. */
+    pm_buffer_t regexp_buffer;
+} pm_regexp_token_buffer_t;
+
+/**
+ * Push the given byte into the token buffer.
+ */
+static inline void
+pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
+    pm_buffer_append_byte(&token_buffer->buffer, byte);
+}
+
+static inline void
+pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
+    pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
+}
+
+/**
+ * Return the width of the character at the end of the current token.
+ */
+static inline size_t
+parser_char_width(const pm_parser_t *parser) {
+    size_t width;
+    if (parser->encoding_changed) {
+        width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+    } else {
+        width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+    }
+
+    // TODO: If the character is invalid in the given encoding, then we'll just
+    // push one byte into the buffer. This should actually be an error.
+    return (width == 0 ? 1 : width);
+}
+
+/**
+ * Push an escaped character into the token buffer.
+ */
+static void
+pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
+    size_t width = parser_char_width(parser);
+    pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
+    parser->current.end += width;
+}
+
+static void
+pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
+    size_t width = parser_char_width(parser);
+    pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
+    pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
+    parser->current.end += width;
+}
+
+static bool
+pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
+    for (size_t index = 0; index < length; index++) {
+        if (value[index] & 0x80) return false;
+    }
+
+    return true;
+}
+
+/**
+ * When we're about to return from lexing the current token and we know for sure
+ * that we have found an escape sequence, this function is called to copy the
+ * contents of the token buffer into the current string on the parser so that it
+ * can be attached to the correct node.
+ */
+static inline void
+pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+    pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
+}
+
+static inline void
+pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
+    pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
+    parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
+    pm_buffer_free(&token_buffer->regexp_buffer);
+}
+
+/**
+ * When we're about to return from lexing the current token, we need to flush
+ * all of the content that we have pushed into the buffer into the current
+ * string. If we haven't pushed anything into the buffer, this means that we
+ * never found an escape sequence, so we can directly reference the bounds of
+ * the current string. Either way, at the return of this function it is expected
+ * that parser->current_string is established in such a way that it can be
+ * attached to a node.
+ */
+static void
+pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+    if (token_buffer->cursor == NULL) {
+        pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+    } else {
+        pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
+        pm_token_buffer_copy(parser, token_buffer);
+    }
+}
+
+static void
+pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
+    if (token_buffer->base.cursor == NULL) {
+        pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+        parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
+    } else {
+        pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+        pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+        pm_regexp_token_buffer_copy(parser, token_buffer);
+    }
+}
+
+#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
+
+/**
+ * When we've found an escape sequence, we need to copy everything up to this
+ * point into the buffer because we're about to provide a string that has
+ * different content than a direct slice of the source.
+ *
+ * It is expected that the parser's current token end will be pointing at one
+ * byte past the backslash that starts the escape sequence.
+ */
+static void
+pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+    const uint8_t *start;
+    if (token_buffer->cursor == NULL) {
+        pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+        start = parser->current.start;
+    } else {
+        start = token_buffer->cursor;
+    }
+
+    const uint8_t *end = parser->current.end - 1;
+    assert(end >= start);
+    pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
+
+    token_buffer->cursor = end;
+}
+
+static void
+pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
+    const uint8_t *start;
+    if (token_buffer->base.cursor == NULL) {
+        pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+        pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+        start = parser->current.start;
+    } else {
+        start = token_buffer->base.cursor;
+    }
+
+    const uint8_t *end = parser->current.end - 1;
+    pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
+    pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
+
+    token_buffer->base.cursor = end;
+}
+
+#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
+
+/**
+ * Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
+ * a tilde heredoc expands out tab characters to the nearest tab boundaries.
+ */
+static inline size_t
+pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
+    size_t whitespace = 0;
+
+    switch (indent) {
+        case PM_HEREDOC_INDENT_NONE:
+            // Do nothing, we can't match a terminator with
+            // indentation and there's no need to calculate common
+            // whitespace.
+            break;
+        case PM_HEREDOC_INDENT_DASH:
+            // Skip past inline whitespace.
+            *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
+            break;
+        case PM_HEREDOC_INDENT_TILDE:
+            // Skip past inline whitespace and calculate common
+            // whitespace.
+            while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
+                if (**cursor == '\t') {
+                    whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
+                } else {
+                    whitespace++;
+                }
+                (*cursor)++;
+            }
+
+            break;
+    }
+
+    return whitespace;
+}
+
+/**
+ * Lex past the delimiter of a percent literal. Handle newlines and heredocs
+ * appropriately.
+ */
+static uint8_t
+pm_lex_percent_delimiter(pm_parser_t *parser) {
+    size_t eol_length = match_eol(parser);
+
+    if (eol_length) {
+        if (parser->heredoc_end) {
+            // If we have already lexed a heredoc, then the newline has already
+            // been added to the list. In this case we want to just flush the
+            // heredoc end.
+            parser_flush_heredoc_end(parser);
+        } else {
+            // Otherwise, we'll add the newline to the list of newlines.
+            pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
+        }
+
+        uint8_t delimiter = *parser->current.end;
+
+        // If our delimiter is \r\n, we want to treat it as if it's \n.
+        // For example, %\r\nfoo\r\n should be "foo"
+        if (eol_length == 2) {
+            delimiter = *(parser->current.end + 1);
+        }
+
+        parser->current.end += eol_length;
+        return delimiter;
+    }
+
+    return *parser->current.end++;
+}
+
+/**
+ * This is a convenience macro that will set the current token type, call the
+ * lex callback, and then return from the parser_lex function.
+ */
+#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
+
+/**
+ * Called when the parser requires a new token. The parser maintains a moving
+ * window of two tokens at a time: parser.previous and parser.current. This
+ * function will move the current token into the previous token and then
+ * lex a new token into the current token.
+ */
+static void
+parser_lex(pm_parser_t *parser) {
+    assert(parser->current.end <= parser->end);
+    parser->previous = parser->current;
+
+    // This value mirrors cmd_state from CRuby.
+    bool previous_command_start = parser->command_start;
+    parser->command_start = false;
+
+    // This is used to communicate to the newline lexing function that we've
+    // already seen a comment.
+    bool lexed_comment = false;
+
+    // Here we cache the current value of the semantic token seen flag. This is
+    // used to reset it in case we find a token that shouldn't flip this flag.
+    unsigned int semantic_token_seen = parser->semantic_token_seen;
+    parser->semantic_token_seen = true;
+
+    switch (parser->lex_modes.current->mode) {
+        case PM_LEX_DEFAULT:
+        case PM_LEX_EMBEXPR:
+        case PM_LEX_EMBVAR:
+
+        // We have a specific named label here because we are going to jump back to
+        // this location in the event that we have lexed a token that should not be
+        // returned to the parser. This includes comments, ignored newlines, and
+        // invalid tokens of some form.
+        lex_next_token: {
+            // If we have the special next_start pointer set, then we're going to jump
+            // to that location and start lexing from there.
+            if (parser->next_start != NULL) {
+                parser->current.end = parser->next_start;
+                parser->next_start = NULL;
+            }
+
+            // This value mirrors space_seen from CRuby. It tracks whether or not
+            // space has been eaten before the start of the next token.
+            bool space_seen = false;
+
+            // First, we're going to skip past any whitespace at the front of the next
+            // token.
+            bool chomping = true;
+            while (parser->current.end < parser->end && chomping) {
+                switch (*parser->current.end) {
+                    case ' ':
+                    case '\t':
+                    case '\f':
+                    case '\v':
+                        parser->current.end++;
+                        space_seen = true;
+                        break;
+                    case '\r':
+                        if (match_eol_offset(parser, 1)) {
+                            chomping = false;
+                        } else {
+                            pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
+                            parser->current.end++;
+                            space_seen = true;
+                        }
+                        break;
+                    case '\\': {
+                        size_t eol_length = match_eol_offset(parser, 1);
+                        if (eol_length) {
+                            if (parser->heredoc_end) {
+                                parser->current.end = parser->heredoc_end;
+                                parser->heredoc_end = NULL;
+                            } else {
+                                parser->current.end += eol_length + 1;
+                                pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+                                space_seen = true;
+                            }
+                        } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
+                            parser->current.end += 2;
+                        } else {
+                            chomping = false;
+                        }
+
+                        break;
+                    }
+                    default:
+                        chomping = false;
+                        break;
+                }
+            }
+
+            // Next, we'll set to start of this token to be the current end.
+            parser->current.start = parser->current.end;
+
+            // We'll check if we're at the end of the file. If we are, then we
+            // need to return the EOF token.
+            if (parser->current.end >= parser->end) {
+                // If we hit EOF, but the EOF came immediately after a newline,
+                // set the start of the token to the newline.  This way any EOF
+                // errors will be reported as happening on that line rather than
+                // a line after.  For example "foo(\n" should report an error
+                // on line 1 even though EOF technically occurs on line 2.
+                if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
+                    parser->current.start -= 1;
+                }
+                LEX(PM_TOKEN_EOF);
+            }
+
+            // Finally, we'll check the current character to determine the next
+            // token.
+            switch (*parser->current.end++) {
+                case '\0':   // NUL or end of script
+                case '\004': // ^D
+                case '\032': // ^Z
+                    parser->current.end--;
+                    LEX(PM_TOKEN_EOF);
+
+                case '#': { // comments
+                    const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
+                    parser->current.end = ending == NULL ? parser->end : ending;
+
+                    // If we found a comment while lexing, then we're going to
+                    // add it to the list of comments in the file and keep
+                    // lexing.
+                    pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
+                    pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
+
+                    if (ending) parser->current.end++;
+                    parser->current.type = PM_TOKEN_COMMENT;
+                    parser_lex_callback(parser);
+
+                    // Here, parse the comment to see if it's a magic comment
+                    // and potentially change state on the parser.
+                    if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
+                        ptrdiff_t length = parser->current.end - parser->current.start;
+
+                        // If we didn't find a magic comment within the first
+                        // pass and we're at the start of the file, then we need
+                        // to do another pass to potentially find other patterns
+                        // for encoding comments.
+                        if (length >= 10 && !parser->encoding_locked) {
+                            parser_lex_magic_comment_encoding(parser);
+                        }
+                    }
+
+                    lexed_comment = true;
+                }
+                PRISM_FALLTHROUGH
+                case '\r':
+                case '\n': {
+                    parser->semantic_token_seen = semantic_token_seen & 0x1;
+                    size_t eol_length = match_eol_at(parser, parser->current.end - 1);
+
+                    if (eol_length) {
+                        // The only way you can have carriage returns in this
+                        // particular loop is if you have a carriage return
+                        // followed by a newline. In that case we'll just skip
+                        // over the carriage return and continue lexing, in
+                        // order to make it so that the newline token
+                        // encapsulates both the carriage return and the
+                        // newline. Note that we need to check that we haven't
+                        // already lexed a comment here because that falls
+                        // through into here as well.
+                        if (!lexed_comment) {
+                            parser->current.end += eol_length - 1; // skip CR
+                        }
+
+                        if (parser->heredoc_end == NULL) {
+                            pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+                        }
+                    }
+
+                    if (parser->heredoc_end) {
+                        parser_flush_heredoc_end(parser);
+                    }
+
+                    // If this is an ignored newline, then we can continue lexing after
+                    // calling the callback with the ignored newline token.
+                    switch (lex_state_ignored_p(parser)) {
+                        case PM_IGNORED_NEWLINE_NONE:
+                            break;
+                        case PM_IGNORED_NEWLINE_PATTERN:
+                            if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->command_start = true;
+                                parser->current.type = PM_TOKEN_NEWLINE;
+                                return;
+                            }
+                            PRISM_FALLTHROUGH
+                        case PM_IGNORED_NEWLINE_ALL:
+                            if (!lexed_comment) parser_lex_ignored_newline(parser);
+                            lexed_comment = false;
+                            goto lex_next_token;
+                    }
+
+                    // Here we need to look ahead and see if there is a call operator
+                    // (either . or &.) that starts the next line. If there is, then this
+                    // is going to become an ignored newline and we're going to instead
+                    // return the call operator.
+                    const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
+                    next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
+
+                    if (next_content < parser->end) {
+                        // If we hit a comment after a newline, then we're going to check
+                        // if it's ignored or if it's followed by a method call ('.').
+                        // If it is, then we're going to call the
+                        // callback with an ignored newline and then continue lexing.
+                        // Otherwise we'll return a regular newline.
+                        if (next_content[0] == '#') {
+                            // Here we look for a "." or "&." following a "\n".
+                            const uint8_t *following = next_newline(next_content, parser->end - next_content);
+
+                            while (following && (following + 1 < parser->end)) {
+                                following++;
+                                following += pm_strspn_inline_whitespace(following, parser->end - following);
+
+                                // If this is not followed by a comment, then we can break out
+                                // of this loop.
+                                if (peek_at(parser, following) != '#') break;
+
+                                // If there is a comment, then we need to find the end of the
+                                // comment and continue searching from there.
+                                following = next_newline(following, parser->end - following);
+                            }
+
+                            // If the lex state was ignored, we will lex the
+                            // ignored newline.
+                            if (lex_state_ignored_p(parser)) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+
+                            // If we hit a '.' or a '&.' we will lex the ignored
+                            // newline.
+                            if (following && (
+                                (peek_at(parser, following) == '.') ||
+                                (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
+                            )) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+
+
+                            // If we are parsing as CRuby 4.0 or later and we
+                            // hit a '&&' or a '||' then we will lex the ignored
+                            // newline.
+                            if (
+                                (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
+                                following && (
+                                    (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
+                                    (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
+                                    (peek_at(parser, following) == 'a' && peek_at(parser, following + 1) == 'n' && peek_at(parser, following + 2) == 'd' && !char_is_identifier(parser, following + 3, parser->end - (following + 3))) ||
+                                    (peek_at(parser, following) == 'o' && peek_at(parser, following + 1) == 'r' && !char_is_identifier(parser, following + 2, parser->end - (following + 2)))
+                                )
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lexed_comment = false;
+                                goto lex_next_token;
+                            }
+                        }
+
+                        // If we hit a . after a newline, then we're in a call chain and
+                        // we need to return the call operator.
+                        if (next_content[0] == '.') {
+                            // To match ripper, we need to emit an ignored newline even though
+                            // it's a real newline in the case that we have a beginless range
+                            // on a subsequent line.
+                            if (peek_at(parser, next_content + 1) == '.') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->command_start = true;
+                                parser->current.type = PM_TOKEN_NEWLINE;
+                                return;
+                            }
+
+                            if (!lexed_comment) parser_lex_ignored_newline(parser);
+                            lex_state_set(parser, PM_LEX_STATE_DOT);
+                            parser->current.start = next_content;
+                            parser->current.end = next_content + 1;
+                            parser->next_start = NULL;
+                            LEX(PM_TOKEN_DOT);
+                        }
+
+                        // If we hit a &. after a newline, then we're in a call chain and
+                        // we need to return the call operator.
+                        if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
+                            if (!lexed_comment) parser_lex_ignored_newline(parser);
+                            lex_state_set(parser, PM_LEX_STATE_DOT);
+                            parser->current.start = next_content;
+                            parser->current.end = next_content + 2;
+                            parser->next_start = NULL;
+                            LEX(PM_TOKEN_AMPERSAND_DOT);
+                        }
+
+                        if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+                            // If we hit an && then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+                            }
+
+                            // If we hit a || then we are in a logical chain and
+                            // we need to return the logical operator.
+                            if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                LEX(PM_TOKEN_PIPE_PIPE);
+                            }
+
+                            // If we hit an 'and' then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (
+                                peek_at(parser, next_content) == 'a' &&
+                                peek_at(parser, next_content + 1) == 'n' &&
+                                peek_at(parser, next_content + 2) == 'd' &&
+                                !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 3;
+                                parser->next_start = NULL;
+                                parser->command_start = true;
+                                LEX(PM_TOKEN_KEYWORD_AND);
+                            }
+
+                            // If we hit a 'or' then we are in a logical chain
+                            // and we need to return the logical operator.
+                            if (
+                                peek_at(parser, next_content) == 'o' &&
+                                peek_at(parser, next_content + 1) == 'r' &&
+                                !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
+                            ) {
+                                if (!lexed_comment) parser_lex_ignored_newline(parser);
+                                lex_state_set(parser, PM_LEX_STATE_BEG);
+                                parser->current.start = next_content;
+                                parser->current.end = next_content + 2;
+                                parser->next_start = NULL;
+                                parser->command_start = true;
+                                LEX(PM_TOKEN_KEYWORD_OR);
+                            }
+                        }
+                    }
+
+                    // At this point we know this is a regular newline, and we can set the
+                    // necessary state and return the token.
+                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                    parser->command_start = true;
+                    parser->current.type = PM_TOKEN_NEWLINE;
+                    if (!lexed_comment) parser_lex_callback(parser);
+                    return;
+                }
+
+                // ,
+                case ',':
+                    if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    LEX(PM_TOKEN_COMMA);
+
+                // (
+                case '(': {
+                    pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
+
+                    if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
+                        type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
+                    }
+
+                    parser->enclosure_nesting++;
+                    lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    pm_do_loop_stack_push(parser, false);
+                    LEX(type);
+                }
+
+                // )
+                case ')':
+                    parser->enclosure_nesting--;
+                    lex_state_set(parser, PM_LEX_STATE_ENDFN);
+                    pm_do_loop_stack_pop(parser);
+                    LEX(PM_TOKEN_PARENTHESIS_RIGHT);
+
+                // ;
+                case ';':
+                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                    parser->command_start = true;
+                    LEX(PM_TOKEN_SEMICOLON);
+
+                // [ [] []=
+                case '[':
+                    parser->enclosure_nesting++;
+                    pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
+
+                    if (lex_state_operator_p(parser)) {
+                        if (match(parser, ']')) {
+                            parser->enclosure_nesting--;
+                            lex_state_set(parser, PM_LEX_STATE_ARG);
+                            LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
+                        }
+
+                        lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
+                        LEX(type);
+                    }
+
+                    if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
+                        type = PM_TOKEN_BRACKET_LEFT_ARRAY;
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    pm_do_loop_stack_push(parser, false);
+                    LEX(type);
+
+                // ]
+                case ']':
+                    parser->enclosure_nesting--;
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    pm_do_loop_stack_pop(parser);
+                    LEX(PM_TOKEN_BRACKET_RIGHT);
+
+                // {
+                case '{': {
+                    pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
+
+                    if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
+                        // This { begins a lambda
+                        parser->command_start = true;
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        type = PM_TOKEN_LAMBDA_BEGIN;
+                    } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
+                        // This { begins a hash literal
+                        lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
+                        // This { begins a block
+                        parser->command_start = true;
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
+                        // This { begins a block on a command
+                        parser->command_start = true;
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    } else {
+                        // This { begins a hash literal
+                        lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    }
+
+                    parser->enclosure_nesting++;
+                    parser->brace_nesting++;
+                    pm_do_loop_stack_push(parser, false);
+
+                    LEX(type);
+                }
+
+                // }
+                case '}':
+                    parser->enclosure_nesting--;
+                    pm_do_loop_stack_pop(parser);
+
+                    if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
+                        lex_mode_pop(parser);
+                        LEX(PM_TOKEN_EMBEXPR_END);
+                    }
+
+                    parser->brace_nesting--;
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    LEX(PM_TOKEN_BRACE_RIGHT);
+
+                // * ** **= *=
+                case '*': {
+                    if (match(parser, '*')) {
+                        if (match(parser, '=')) {
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                            LEX(PM_TOKEN_STAR_STAR_EQUAL);
+                        }
+
+                        pm_token_type_t type = PM_TOKEN_STAR_STAR;
+
+                        if (lex_state_spcarg_p(parser, space_seen)) {
+                            pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
+                            type = PM_TOKEN_USTAR_STAR;
+                        } else if (lex_state_beg_p(parser)) {
+                            type = PM_TOKEN_USTAR_STAR;
+                        } else if (ambiguous_operator_p(parser, space_seen)) {
+                            PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
+                        }
+
+                        if (lex_state_operator_p(parser)) {
+                            lex_state_set(parser, PM_LEX_STATE_ARG);
+                        } else {
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                        }
+
+                        LEX(type);
+                    }
+
+                    if (match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_STAR_EQUAL);
+                    }
+
+                    pm_token_type_t type = PM_TOKEN_STAR;
+
+                    if (lex_state_spcarg_p(parser, space_seen)) {
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
+                        type = PM_TOKEN_USTAR;
+                    } else if (lex_state_beg_p(parser)) {
+                        type = PM_TOKEN_USTAR;
+                    } else if (ambiguous_operator_p(parser, space_seen)) {
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    LEX(type);
+                }
+
+                // ! != !~ !@
+                case '!':
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                        if (match(parser, '@')) {
+                            LEX(PM_TOKEN_BANG);
+                        }
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    if (match(parser, '=')) {
+                        LEX(PM_TOKEN_BANG_EQUAL);
+                    }
+
+                    if (match(parser, '~')) {
+                        LEX(PM_TOKEN_BANG_TILDE);
+                    }
+
+                    LEX(PM_TOKEN_BANG);
+
+                // = => =~ == === =begin
+                case '=':
+                    if (
+                        current_token_starts_line(parser) &&
+                        (parser->current.end + 5 <= parser->end) &&
+                        memcmp(parser->current.end, "begin", 5) == 0 &&
+                        (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
+                    ) {
+                        pm_token_type_t type = lex_embdoc(parser);
+                        if (type == PM_TOKEN_EOF) {
+                            LEX(type);
+                        }
+
+                        goto lex_next_token;
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    if (match(parser, '>')) {
+                        LEX(PM_TOKEN_EQUAL_GREATER);
+                    }
+
+                    if (match(parser, '~')) {
+                        LEX(PM_TOKEN_EQUAL_TILDE);
+                    }
+
+                    if (match(parser, '=')) {
+                        LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
+                    }
+
+                    LEX(PM_TOKEN_EQUAL);
+
+                // < << <<= <= <=>
+                case '<':
+                    if (match(parser, '<')) {
+                        if (
+                            !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
+                            !lex_state_end_p(parser) &&
+                            (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
+                        ) {
+                            const uint8_t *end = parser->current.end;
+
+                            pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
+                            pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
+
+                            if (match(parser, '-')) {
+                                indent = PM_HEREDOC_INDENT_DASH;
+                            }
+                            else if (match(parser, '~')) {
+                                indent = PM_HEREDOC_INDENT_TILDE;
+                            }
+
+                            if (match(parser, '`')) {
+                                quote = PM_HEREDOC_QUOTE_BACKTICK;
+                            }
+                            else if (match(parser, '"')) {
+                                quote = PM_HEREDOC_QUOTE_DOUBLE;
+                            }
+                            else if (match(parser, '\'')) {
+                                quote = PM_HEREDOC_QUOTE_SINGLE;
+                            }
+
+                            const uint8_t *ident_start = parser->current.end;
+                            size_t width = 0;
+
+                            if (parser->current.end >= parser->end) {
+                                parser->current.end = end;
+                            } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
+                                parser->current.end = end;
+                            } else {
+                                if (quote == PM_HEREDOC_QUOTE_NONE) {
+                                    parser->current.end += width;
+
+                                    while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
+                                        parser->current.end += width;
+                                    }
+                                } else {
+                                    // If we have quotes, then we're going to go until we find the
+                                    // end quote.
+                                    while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
+                                        if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
+                                        parser->current.end++;
+                                    }
+                                }
+
+                                size_t ident_length = (size_t) (parser->current.end - ident_start);
+                                bool ident_error = false;
+
+                                if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
+                                    pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
+                                    ident_error = true;
+                                }
+
+                                parser->explicit_encoding = NULL;
+                                lex_mode_push(parser, (pm_lex_mode_t) {
+                                    .mode = PM_LEX_HEREDOC,
+                                    .as.heredoc = {
+                                        .base = {
+                                            .ident_start = ident_start,
+                                            .ident_length = ident_length,
+                                            .quote = quote,
+                                            .indent = indent
+                                        },
+                                        .next_start = parser->current.end,
+                                        .common_whitespace = NULL,
+                                        .line_continuation = false
+                                    }
+                                });
+
+                                if (parser->heredoc_end == NULL) {
+                                    const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
+
+                                    if (body_start == NULL) {
+                                        // If there is no newline after the heredoc identifier, then
+                                        // this is not a valid heredoc declaration. In this case we
+                                        // will add an error, but we will still return a heredoc
+                                        // start.
+                                        if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
+                                        body_start = parser->end;
+                                    } else {
+                                        // Otherwise, we want to indicate that the body of the
+                                        // heredoc starts on the character after the next newline.
+                                        pm_newline_list_append(&parser->newline_list, body_start);
+                                        body_start++;
+                                    }
+
+                                    parser->next_start = body_start;
+                                } else {
+                                    parser->next_start = parser->heredoc_end;
+                                }
+
+                                LEX(PM_TOKEN_HEREDOC_START);
+                            }
+                        }
+
+                        if (match(parser, '=')) {
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                            LEX(PM_TOKEN_LESS_LESS_EQUAL);
+                        }
+
+                        if (ambiguous_operator_p(parser, space_seen)) {
+                            PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
+                        }
+
+                        if (lex_state_operator_p(parser)) {
+                            lex_state_set(parser, PM_LEX_STATE_ARG);
+                        } else {
+                            if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                        }
+
+                        LEX(PM_TOKEN_LESS_LESS);
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    if (match(parser, '=')) {
+                        if (match(parser, '>')) {
+                            LEX(PM_TOKEN_LESS_EQUAL_GREATER);
+                        }
+
+                        LEX(PM_TOKEN_LESS_EQUAL);
+                    }
+
+                    LEX(PM_TOKEN_LESS);
+
+                // > >> >>= >=
+                case '>':
+                    if (match(parser, '>')) {
+                        if (lex_state_operator_p(parser)) {
+                            lex_state_set(parser, PM_LEX_STATE_ARG);
+                        } else {
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                        }
+                        LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
+
+                // double-quoted string literal
+                case '"': {
+                    bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
+                    lex_mode_push_string(parser, true, label_allowed, '\0', '"');
+                    LEX(PM_TOKEN_STRING_BEGIN);
+                }
+
+                // xstring literal
+                case '`': {
+                    if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
+                        lex_state_set(parser, PM_LEX_STATE_ENDFN);
+                        LEX(PM_TOKEN_BACKTICK);
+                    }
+
+                    if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
+                        if (previous_command_start) {
+                            lex_state_set(parser, PM_LEX_STATE_CMDARG);
+                        } else {
+                            lex_state_set(parser, PM_LEX_STATE_ARG);
+                        }
+
+                        LEX(PM_TOKEN_BACKTICK);
+                    }
+
+                    lex_mode_push_string(parser, true, false, '\0', '`');
+                    LEX(PM_TOKEN_BACKTICK);
+                }
+
+                // single-quoted string literal
+                case '\'': {
+                    bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
+                    lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
+                    LEX(PM_TOKEN_STRING_BEGIN);
+                }
+
+                // ? character literal
+                case '?':
+                    LEX(lex_question_mark(parser));
+
+                // & && &&= &=
+                case '&': {
+                    if (match(parser, '&')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+
+                        if (match(parser, '=')) {
+                            LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
+                        }
+
+                        LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+                    }
+
+                    if (match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_AMPERSAND_EQUAL);
+                    }
+
+                    if (match(parser, '.')) {
+                        lex_state_set(parser, PM_LEX_STATE_DOT);
+                        LEX(PM_TOKEN_AMPERSAND_DOT);
+                    }
+
+                    pm_token_type_t type = PM_TOKEN_AMPERSAND;
+                    if (lex_state_spcarg_p(parser, space_seen)) {
+                        if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
+                            pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
+                        } else {
+                            const uint8_t delim = peek_offset(parser, 1);
+
+                            if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
+                                pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
+                            }
+                        }
+
+                        type = PM_TOKEN_UAMPERSAND;
+                    } else if (lex_state_beg_p(parser)) {
+                        type = PM_TOKEN_UAMPERSAND;
+                    } else if (ambiguous_operator_p(parser, space_seen)) {
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    LEX(type);
+                }
+
+                // | || ||= |=
+                case '|':
+                    if (match(parser, '|')) {
+                        if (match(parser, '=')) {
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                            LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
+                        }
+
+                        if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
+                            parser->current.end--;
+                            LEX(PM_TOKEN_PIPE);
+                        }
+
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_PIPE_PIPE);
+                    }
+
+                    if (match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_PIPE_EQUAL);
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    }
+
+                    LEX(PM_TOKEN_PIPE);
+
+                // + += +@
+                case '+': {
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+
+                        if (match(parser, '@')) {
+                            LEX(PM_TOKEN_UPLUS);
+                        }
+
+                        LEX(PM_TOKEN_PLUS);
+                    }
+
+                    if (match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_PLUS_EQUAL);
+                    }
+
+                    if (
+                        lex_state_beg_p(parser) ||
+                        (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
+                    ) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+
+                        if (pm_char_is_decimal_digit(peek(parser))) {
+                            parser->current.end++;
+                            pm_token_type_t type = lex_numeric(parser);
+                            lex_state_set(parser, PM_LEX_STATE_END);
+                            LEX(type);
+                        }
+
+                        LEX(PM_TOKEN_UPLUS);
+                    }
+
+                    if (ambiguous_operator_p(parser, space_seen)) {
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                    LEX(PM_TOKEN_PLUS);
+                }
+
+                // - -= -@
+                case '-': {
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+
+                        if (match(parser, '@')) {
+                            LEX(PM_TOKEN_UMINUS);
+                        }
+
+                        LEX(PM_TOKEN_MINUS);
+                    }
+
+                    if (match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_MINUS_EQUAL);
+                    }
+
+                    if (match(parser, '>')) {
+                        lex_state_set(parser, PM_LEX_STATE_ENDFN);
+                        LEX(PM_TOKEN_MINUS_GREATER);
+                    }
+
+                    bool spcarg = lex_state_spcarg_p(parser, space_seen);
+                    bool is_beg = lex_state_beg_p(parser);
+                    if (!is_beg && spcarg) {
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
+                    }
+
+                    if (is_beg || spcarg) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
+                    }
+
+                    if (ambiguous_operator_p(parser, space_seen)) {
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                    LEX(PM_TOKEN_MINUS);
+                }
+
+                // . .. ...
+                case '.': {
+                    bool beg_p = lex_state_beg_p(parser);
+
+                    if (match(parser, '.')) {
+                        if (match(parser, '.')) {
+                            // If we're _not_ inside a range within default parameters
+                            if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
+                                if (lex_state_p(parser, PM_LEX_STATE_END)) {
+                                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                                } else {
+                                    lex_state_set(parser, PM_LEX_STATE_ENDARG);
+                                }
+                                LEX(PM_TOKEN_UDOT_DOT_DOT);
+                            }
+
+                            if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
+                                pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
+                            }
+
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                            LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
+                        }
+
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_DOT);
+                    LEX(PM_TOKEN_DOT);
+                }
+
+                // integer
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                case '8':
+                case '9': {
+                    pm_token_type_t type = lex_numeric(parser);
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    LEX(type);
+                }
+
+                // :: symbol
+                case ':':
+                    if (match(parser, ':')) {
+                        if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
+                            lex_state_set(parser, PM_LEX_STATE_BEG);
+                            LEX(PM_TOKEN_UCOLON_COLON);
+                        }
+
+                        lex_state_set(parser, PM_LEX_STATE_DOT);
+                        LEX(PM_TOKEN_COLON_COLON);
+                    }
+
+                    if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_COLON);
+                    }
+
+                    if (peek(parser) == '"' || peek(parser) == '\'') {
+                        lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
+                        parser->current.end++;
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_FNAME);
+                    LEX(PM_TOKEN_SYMBOL_BEGIN);
+
+                // / /=
+                case '/':
+                    if (lex_state_beg_p(parser)) {
+                        lex_mode_push_regexp(parser, '\0', '/');
+                        LEX(PM_TOKEN_REGEXP_BEGIN);
+                    }
+
+                    if (match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_SLASH_EQUAL);
+                    }
+
+                    if (lex_state_spcarg_p(parser, space_seen)) {
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
+                        lex_mode_push_regexp(parser, '\0', '/');
+                        LEX(PM_TOKEN_REGEXP_BEGIN);
+                    }
+
+                    if (ambiguous_operator_p(parser, space_seen)) {
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
+                    }
+
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    LEX(PM_TOKEN_SLASH);
+
+                // ^ ^=
+                case '^':
+                    if (lex_state_operator_p(parser)) {
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+                    LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
+
+                // ~ ~@
+                case '~':
+                    if (lex_state_operator_p(parser)) {
+                        (void) match(parser, '@');
+                        lex_state_set(parser, PM_LEX_STATE_ARG);
+                    } else {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                    }
+
+                    LEX(PM_TOKEN_TILDE);
+
+                // % %= %i %I %q %Q %w %W
+                case '%': {
+                    // If there is no subsequent character then we have an
+                    // invalid token. We're going to say it's the percent
+                    // operator because we don't want to move into the string
+                    // lex mode unnecessarily.
+                    if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
+                        pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
+                        LEX(PM_TOKEN_PERCENT);
+                    }
+
+                    if (!lex_state_beg_p(parser) && match(parser, '=')) {
+                        lex_state_set(parser, PM_LEX_STATE_BEG);
+                        LEX(PM_TOKEN_PERCENT_EQUAL);
+                    } else if (
+                        lex_state_beg_p(parser) ||
+                        (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
+                        lex_state_spcarg_p(parser, space_seen)
+                    ) {
+                        if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
+                            if (*parser->current.end >= 0x80) {
+                                pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
+                            }
+
+                            const uint8_t delimiter = pm_lex_percent_delimiter(parser);
+                            lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
+                            LEX(PM_TOKEN_STRING_BEGIN);
+                        }
+
+                        // Delimiters for %-literals cannot be alphanumeric. We
+                        // validate that here.
+                        uint8_t delimiter = peek_offset(parser, 1);
+                        if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
+                            pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
+                            goto lex_next_token;
+                        }
+
+                        switch (peek(parser)) {
+                            case 'i': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
+                                } else {
+                                    lex_mode_push_list_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_PERCENT_LOWER_I);
+                            }
+                            case 'I': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
+                                } else {
+                                    lex_mode_push_list_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_PERCENT_UPPER_I);
+                            }
+                            case 'r': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    const uint8_t delimiter = pm_lex_percent_delimiter(parser);
+                                    lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
+                                } else {
+                                    lex_mode_push_regexp(parser, '\0', '\0');
+                                }
+
+                                LEX(PM_TOKEN_REGEXP_BEGIN);
+                            }
+                            case 'q': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    const uint8_t delimiter = pm_lex_percent_delimiter(parser);
+                                    lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
+                                } else {
+                                    lex_mode_push_string_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_STRING_BEGIN);
+                            }
+                            case 'Q': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    const uint8_t delimiter = pm_lex_percent_delimiter(parser);
+                                    lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
+                                } else {
+                                    lex_mode_push_string_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_STRING_BEGIN);
+                            }
+                            case 's': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    const uint8_t delimiter = pm_lex_percent_delimiter(parser);
+                                    lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
+                                    lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
+                                } else {
+                                    lex_mode_push_string_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_SYMBOL_BEGIN);
+                            }
+                            case 'w': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
+                                } else {
+                                    lex_mode_push_list_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_PERCENT_LOWER_W);
+                            }
+                            case 'W': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
+                                } else {
+                                    lex_mode_push_list_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_PERCENT_UPPER_W);
+                            }
+                            case 'x': {
+                                parser->current.end++;
+
+                                if (parser->current.end < parser->end) {
+                                    const uint8_t delimiter = pm_lex_percent_delimiter(parser);
+                                    lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
+                                } else {
+                                    lex_mode_push_string_eof(parser);
+                                }
+
+                                LEX(PM_TOKEN_PERCENT_LOWER_X);
+                            }
+                            default:
+                                // If we get to this point, then we have a % that is completely
+                                // unparsable. In this case we'll just drop it from the parser
+                                // and skip past it and hope that the next token is something
+                                // that we can parse.
+                                pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
+                                goto lex_next_token;
+                        }
+                    }
+
+                    if (ambiguous_operator_p(parser, space_seen)) {
+                        PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
+                    }
+
+                    lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
+                    LEX(PM_TOKEN_PERCENT);
+                }
+
+                // global variable
+                case '$': {
+                    pm_token_type_t type = lex_global_variable(parser);
+
+                    // If we're lexing an embedded variable, then we need to pop back into
+                    // the parent lex context.
+                    if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
+                        lex_mode_pop(parser);
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    LEX(type);
+                }
+
+                // instance variable, class variable
+                case '@':
+                    lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
+                    LEX(lex_at_variable(parser));
+
+                default: {
+                    if (*parser->current.start != '_') {
+                        size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
+
+                        // If this isn't the beginning of an identifier, then
+                        // it's an invalid token as we've exhausted all of the
+                        // other options. We'll skip past it and return the next
+                        // token after adding an appropriate error message.
+                        if (!width) {
+                            if (*parser->current.start >= 0x80) {
+                                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
+                            } else if (*parser->current.start == '\\') {
+                                switch (peek_at(parser, parser->current.start + 1)) {
+                                    case ' ':
+                                        parser->current.end++;
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
+                                        break;
+                                    case '\f':
+                                        parser->current.end++;
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
+                                        break;
+                                    case '\t':
+                                        parser->current.end++;
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
+                                        break;
+                                    case '\v':
+                                        parser->current.end++;
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
+                                        break;
+                                    case '\r':
+                                        if (peek_at(parser, parser->current.start + 2) != '\n') {
+                                            parser->current.end++;
+                                            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
+                                            break;
+                                        }
+                                        PRISM_FALLTHROUGH
+                                    default:
+                                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
+                                        break;
+                                }
+                            } else if (char_is_ascii_printable(*parser->current.start)) {
+                                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
+                            } else {
+                                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
+                            }
+
+                            goto lex_next_token;
+                        }
+
+                        parser->current.end = parser->current.start + width;
+                    }
+
+                    pm_token_type_t type = lex_identifier(parser, previous_command_start);
+
+                    // If we've hit a __END__ and it was at the start of the
+                    // line or the start of the file and it is followed by
+                    // either a \n or a \r\n, then this is the last token of the
+                    // file.
+                    if (
+                        ((parser->current.end - parser->current.start) == 7) &&
+                        current_token_starts_line(parser) &&
+                        (memcmp(parser->current.start, "__END__", 7) == 0) &&
+                        (parser->current.end == parser->end || match_eol(parser))
+                    ) {
+                        // Since we know we're about to add an __END__ comment,
+                        // we know we need to add all of the newlines to get the
+                        // correct column information for it.
+                        const uint8_t *cursor = parser->current.end;
+                        while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
+                            pm_newline_list_append(&parser->newline_list, cursor++);
+                        }
+
+                        parser->current.end = parser->end;
+                        parser->current.type = PM_TOKEN___END__;
+                        parser_lex_callback(parser);
+
+                        parser->data_loc.start = parser->current.start;
+                        parser->data_loc.end = parser->current.end;
+
+                        LEX(PM_TOKEN_EOF);
+                    }
+
+                    pm_lex_state_t last_state = parser->lex_state;
+
+                    if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
+                        if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
+                            if (previous_command_start) {
+                                lex_state_set(parser, PM_LEX_STATE_CMDARG);
+                            } else {
+                                lex_state_set(parser, PM_LEX_STATE_ARG);
+                            }
+                        } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
+                            lex_state_set(parser, PM_LEX_STATE_ENDFN);
+                        } else {
+                            lex_state_set(parser, PM_LEX_STATE_END);
+                        }
+                    }
+
+                    if (
+                        !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
+                        (type == PM_TOKEN_IDENTIFIER) &&
+                        ((pm_parser_local_depth(parser, &parser->current) != -1) ||
+                         pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
+                    ) {
+                        lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
+                    }
+
+                    LEX(type);
+                }
+            }
+        }
+        case PM_LEX_LIST: {
+            if (parser->next_start != NULL) {
+                parser->current.end = parser->next_start;
+                parser->next_start = NULL;
+            }
+
+            // First we'll set the beginning of the token.
+            parser->current.start = parser->current.end;
+
+            // If there's any whitespace at the start of the list, then we're
+            // going to trim it off the beginning and create a new token.
+            size_t whitespace;
+
+            if (parser->heredoc_end) {
+                whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
+                if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
+                    whitespace += 1;
+                }
+            } else {
+                whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
+            }
+
+            if (whitespace > 0) {
+                parser->current.end += whitespace;
+                if (peek_offset(parser, -1) == '\n') {
+                    // mutates next_start
+                    parser_flush_heredoc_end(parser);
+                }
+                LEX(PM_TOKEN_WORDS_SEP);
+            }
+
+            // We'll check if we're at the end of the file. If we are, then we
+            // need to return the EOF token.
+            if (parser->current.end >= parser->end) {
+                LEX(PM_TOKEN_EOF);
+            }
+
+            // Here we'll get a list of the places where strpbrk should break,
+            // and then find the first one.
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+            const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
+            const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+
+            // If we haven't found an escape yet, then this buffer will be
+            // unallocated since we can refer directly to the source string.
+            pm_token_buffer_t token_buffer = { 0 };
+
+            while (breakpoint != NULL) {
+                // If we hit whitespace, then we must have received content by
+                // now, so we can return an element of the list.
+                if (pm_char_is_whitespace(*breakpoint)) {
+                    parser->current.end = breakpoint;
+                    pm_token_buffer_flush(parser, &token_buffer);
+                    LEX(PM_TOKEN_STRING_CONTENT);
+                }
+
+                // If we hit the terminator, we need to check which token to
+                // return.
+                if (*breakpoint == lex_mode->as.list.terminator) {
+                    // If this terminator doesn't actually close the list, then
+                    // we need to continue on past it.
+                    if (lex_mode->as.list.nesting > 0) {
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        lex_mode->as.list.nesting--;
+                        continue;
+                    }
+
+                    // If we've hit the terminator and we've already skipped
+                    // past content, then we can return a list node.
+                    if (breakpoint > parser->current.start) {
+                        parser->current.end = breakpoint;
+                        pm_token_buffer_flush(parser, &token_buffer);
+                        LEX(PM_TOKEN_STRING_CONTENT);
+                    }
+
+                    // Otherwise, switch back to the default state and return
+                    // the end of the list.
+                    parser->current.end = breakpoint + 1;
+                    lex_mode_pop(parser);
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    LEX(PM_TOKEN_STRING_END);
+                }
+
+                // If we hit a null byte, skip directly past it.
+                if (*breakpoint == '\0') {
+                    breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
+                    continue;
+                }
+
+                // If we hit escapes, then we need to treat the next token
+                // literally. In this case we'll skip past the next character
+                // and find the next breakpoint.
+                if (*breakpoint == '\\') {
+                    parser->current.end = breakpoint + 1;
+
+                    // If we've hit the end of the file, then break out of the
+                    // loop by setting the breakpoint to NULL.
+                    if (parser->current.end == parser->end) {
+                        breakpoint = NULL;
+                        continue;
+                    }
+
+                    pm_token_buffer_escape(parser, &token_buffer);
+                    uint8_t peeked = peek(parser);
+
+                    switch (peeked) {
+                        case ' ':
+                        case '\f':
+                        case '\t':
+                        case '\v':
+                        case '\\':
+                            pm_token_buffer_push_byte(&token_buffer, peeked);
+                            parser->current.end++;
+                            break;
+                        case '\r':
+                            parser->current.end++;
+                            if (peek(parser) != '\n') {
+                                pm_token_buffer_push_byte(&token_buffer, '\r');
+                                break;
+                            }
+                        PRISM_FALLTHROUGH
+                        case '\n':
+                            pm_token_buffer_push_byte(&token_buffer, '\n');
+
+                            if (parser->heredoc_end) {
+                                // ... if we are on the same line as a heredoc,
+                                // flush the heredoc and continue parsing after
+                                // heredoc_end.
+                                parser_flush_heredoc_end(parser);
+                                pm_token_buffer_copy(parser, &token_buffer);
+                                LEX(PM_TOKEN_STRING_CONTENT);
+                            } else {
+                                // ... else track the newline.
+                                pm_newline_list_append(&parser->newline_list, parser->current.end);
+                            }
+
+                            parser->current.end++;
+                            break;
+                        default:
+                            if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
+                                pm_token_buffer_push_byte(&token_buffer, peeked);
+                                parser->current.end++;
+                            } else if (lex_mode->as.list.interpolation) {
+                                escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
+                            } else {
+                                pm_token_buffer_push_byte(&token_buffer, '\\');
+                                pm_token_buffer_push_escaped(&token_buffer, parser);
+                            }
+
+                            break;
+                    }
+
+                    token_buffer.cursor = parser->current.end;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                    continue;
+                }
+
+                // If we hit a #, then we will attempt to lex interpolation.
+                if (*breakpoint == '#') {
+                    pm_token_type_t type = lex_interpolation(parser, breakpoint);
+
+                    if (type == PM_TOKEN_NOT_PROVIDED) {
+                        // If we haven't returned at this point then we had something
+                        // that looked like an interpolated class or instance variable
+                        // like "#@" but wasn't actually. In this case we'll just skip
+                        // to the next breakpoint.
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        continue;
+                    }
+
+                    if (type == PM_TOKEN_STRING_CONTENT) {
+                        pm_token_buffer_flush(parser, &token_buffer);
+                    }
+
+                    LEX(type);
+                }
+
+                // If we've hit the incrementor, then we need to skip past it
+                // and find the next breakpoint.
+                assert(*breakpoint == lex_mode->as.list.incrementor);
+                parser->current.end = breakpoint + 1;
+                breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                lex_mode->as.list.nesting++;
+                continue;
+            }
+
+            if (parser->current.end > parser->current.start) {
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
+
+            // If we were unable to find a breakpoint, then this token hits the
+            // end of the file.
+            parser->current.end = parser->end;
+            pm_token_buffer_flush(parser, &token_buffer);
+            LEX(PM_TOKEN_STRING_CONTENT);
+        }
+        case PM_LEX_REGEXP: {
+            // First, we'll set to start of this token to be the current end.
+            if (parser->next_start == NULL) {
+                parser->current.start = parser->current.end;
+            } else {
+                parser->current.start = parser->next_start;
+                parser->current.end = parser->next_start;
+                parser->next_start = NULL;
+            }
+
+            // We'll check if we're at the end of the file. If we are, then we
+            // need to return the EOF token.
+            if (parser->current.end >= parser->end) {
+                LEX(PM_TOKEN_EOF);
+            }
+
+            // Get a reference to the current mode.
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+
+            // These are the places where we need to split up the content of the
+            // regular expression. We'll use strpbrk to find the first of these
+            // characters.
+            const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
+            const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+            pm_regexp_token_buffer_t token_buffer = { 0 };
+
+            while (breakpoint != NULL) {
+                uint8_t term = lex_mode->as.regexp.terminator;
+                bool is_terminator = (*breakpoint == term);
+
+                // If the terminator is newline, we need to consider \r\n _also_ a newline
+                // For example: `%\nfoo\r\n`
+                // The string should be "foo", not "foo\r"
+                if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+                    if (term == '\n') {
+                        is_terminator = true;
+                    }
+
+                    // If the terminator is a CR, but we see a CRLF, we need to
+                    // treat the CRLF as a newline, meaning this is _not_ the
+                    // terminator
+                    if (term == '\r') {
+                        is_terminator = false;
+                    }
+                }
+
+                // If we hit the terminator, we need to determine what kind of
+                // token to return.
+                if (is_terminator) {
+                    if (lex_mode->as.regexp.nesting > 0) {
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                        lex_mode->as.regexp.nesting--;
+                        continue;
+                    }
+
+                    // Here we've hit the terminator. If we have already consumed
+                    // content then we need to return that content as string content
+                    // first.
+                    if (breakpoint > parser->current.start) {
+                        parser->current.end = breakpoint;
+                        pm_regexp_token_buffer_flush(parser, &token_buffer);
+                        LEX(PM_TOKEN_STRING_CONTENT);
+                    }
+
+                    // Check here if we need to track the newline.
+                    size_t eol_length = match_eol_at(parser, breakpoint);
+                    if (eol_length) {
+                        parser->current.end = breakpoint + eol_length;
+
+                        // Track the newline if we're not in a heredoc that
+                        // would have already have added the newline to the
+                        // list.
+                        if (parser->heredoc_end == NULL) {
+                            pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+                        }
+                    } else {
+                        parser->current.end = breakpoint + 1;
+                    }
+
+                    // Since we've hit the terminator of the regular expression,
+                    // we now need to parse the options.
+                    parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
+
+                    lex_mode_pop(parser);
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    LEX(PM_TOKEN_REGEXP_END);
+                }
+
+                // If we've hit the incrementor, then we need to skip past it
+                // and find the next breakpoint.
+                if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
+                    parser->current.end = breakpoint + 1;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                    lex_mode->as.regexp.nesting++;
+                    continue;
+                }
+
+                switch (*breakpoint) {
+                    case '\0':
+                        // If we hit a null byte, skip directly past it.
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                        break;
+                    case '\r':
+                        if (peek_at(parser, breakpoint + 1) != '\n') {
+                            parser->current.end = breakpoint + 1;
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                            break;
+                        }
+
+                        breakpoint++;
+                        parser->current.end = breakpoint;
+                        pm_regexp_token_buffer_escape(parser, &token_buffer);
+                        token_buffer.base.cursor = breakpoint;
+
+                        PRISM_FALLTHROUGH
+                    case '\n':
+                        // If we've hit a newline, then we need to track that in
+                        // the list of newlines.
+                        if (parser->heredoc_end == NULL) {
+                            pm_newline_list_append(&parser->newline_list, breakpoint);
+                            parser->current.end = breakpoint + 1;
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                            break;
+                        }
+
+                        parser->current.end = breakpoint + 1;
+                        parser_flush_heredoc_end(parser);
+                        pm_regexp_token_buffer_flush(parser, &token_buffer);
+                        LEX(PM_TOKEN_STRING_CONTENT);
+                    case '\\': {
+                        // If we hit escapes, then we need to treat the next
+                        // token literally. In this case we'll skip past the
+                        // next character and find the next breakpoint.
+                        parser->current.end = breakpoint + 1;
+
+                        // If we've hit the end of the file, then break out of
+                        // the loop by setting the breakpoint to NULL.
+                        if (parser->current.end == parser->end) {
+                            breakpoint = NULL;
+                            break;
+                        }
+
+                        pm_regexp_token_buffer_escape(parser, &token_buffer);
+                        uint8_t peeked = peek(parser);
+
+                        switch (peeked) {
+                            case '\r':
+                                parser->current.end++;
+                                if (peek(parser) != '\n') {
+                                    if (lex_mode->as.regexp.terminator != '\r') {
+                                        pm_token_buffer_push_byte(&token_buffer.base, '\\');
+                                    }
+                                    pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
+                                    pm_token_buffer_push_byte(&token_buffer.base, '\r');
+                                    break;
+                                }
+                            PRISM_FALLTHROUGH
+                            case '\n':
+                                if (parser->heredoc_end) {
+                                    // ... if we are on the same line as a heredoc,
+                                    // flush the heredoc and continue parsing after
+                                    // heredoc_end.
+                                    parser_flush_heredoc_end(parser);
+                                    pm_regexp_token_buffer_copy(parser, &token_buffer);
+                                    LEX(PM_TOKEN_STRING_CONTENT);
+                                } else {
+                                    // ... else track the newline.
+                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                }
+
+                                parser->current.end++;
+                                break;
+                            case 'c':
+                            case 'C':
+                            case 'M':
+                            case 'u':
+                            case 'x':
+                                escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
+                                break;
+                            default:
+                                if (lex_mode->as.regexp.terminator == peeked) {
+                                    // Some characters when they are used as the
+                                    // terminator also receive an escape. They are
+                                    // enumerated here.
+                                    switch (peeked) {
+                                        case '$': case ')': case '*': case '+':
+                                        case '.': case '>': case '?': case ']':
+                                        case '^': case '|': case '}':
+                                            pm_token_buffer_push_byte(&token_buffer.base, '\\');
+                                            break;
+                                        default:
+                                            break;
+                                    }
+
+                                    pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
+                                    pm_token_buffer_push_byte(&token_buffer.base, peeked);
+                                    parser->current.end++;
+                                    break;
+                                }
+
+                                if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
+                                pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
+                                break;
+                        }
+
+                        token_buffer.base.cursor = parser->current.end;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                        break;
+                    }
+                    case '#': {
+                        // If we hit a #, then we will attempt to lex
+                        // interpolation.
+                        pm_token_type_t type = lex_interpolation(parser, breakpoint);
+
+                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                            // If we haven't returned at this point then we had
+                            // something that looked like an interpolated class or
+                            // instance variable like "#@" but wasn't actually. In
+                            // this case we'll just skip to the next breakpoint.
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+                            break;
+                        }
+
+                        if (type == PM_TOKEN_STRING_CONTENT) {
+                            pm_regexp_token_buffer_flush(parser, &token_buffer);
+                        }
+
+                        LEX(type);
+                    }
+                    default:
+                        assert(false && "unreachable");
+                        break;
+                }
+            }
+
+            if (parser->current.end > parser->current.start) {
+                pm_regexp_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
+
+            // If we were unable to find a breakpoint, then this token hits the
+            // end of the file.
+            parser->current.end = parser->end;
+            pm_regexp_token_buffer_flush(parser, &token_buffer);
+            LEX(PM_TOKEN_STRING_CONTENT);
+        }
+        case PM_LEX_STRING: {
+            // First, we'll set to start of this token to be the current end.
+            if (parser->next_start == NULL) {
+                parser->current.start = parser->current.end;
+            } else {
+                parser->current.start = parser->next_start;
+                parser->current.end = parser->next_start;
+                parser->next_start = NULL;
+            }
+
+            // We'll check if we're at the end of the file. If we are, then we need to
+            // return the EOF token.
+            if (parser->current.end >= parser->end) {
+                LEX(PM_TOKEN_EOF);
+            }
+
+            // These are the places where we need to split up the content of the
+            // string. We'll use strpbrk to find the first of these characters.
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+            const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
+            const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+
+            // If we haven't found an escape yet, then this buffer will be
+            // unallocated since we can refer directly to the source string.
+            pm_token_buffer_t token_buffer = { 0 };
+
+            while (breakpoint != NULL) {
+                // If we hit the incrementor, then we'll increment then nesting and
+                // continue lexing.
+                if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
+                    lex_mode->as.string.nesting++;
+                    parser->current.end = breakpoint + 1;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                    continue;
+                }
+
+                uint8_t term = lex_mode->as.string.terminator;
+                bool is_terminator = (*breakpoint == term);
+
+                // If the terminator is newline, we need to consider \r\n _also_ a newline
+                // For example: `%r\nfoo\r\n`
+                // The string should be /foo/, not /foo\r/
+                if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
+                    if (term == '\n') {
+                        is_terminator = true;
+                    }
+
+                    // If the terminator is a CR, but we see a CRLF, we need to
+                    // treat the CRLF as a newline, meaning this is _not_ the
+                    // terminator
+                    if (term == '\r') {
+                        is_terminator = false;
+                    }
+                }
+
+                // Note that we have to check the terminator here first because we could
+                // potentially be parsing a % string that has a # character as the
+                // terminator.
+                if (is_terminator) {
+                    // If this terminator doesn't actually close the string, then we need
+                    // to continue on past it.
+                    if (lex_mode->as.string.nesting > 0) {
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        lex_mode->as.string.nesting--;
+                        continue;
+                    }
+
+                    // Here we've hit the terminator. If we have already consumed content
+                    // then we need to return that content as string content first.
+                    if (breakpoint > parser->current.start) {
+                        parser->current.end = breakpoint;
+                        pm_token_buffer_flush(parser, &token_buffer);
+                        LEX(PM_TOKEN_STRING_CONTENT);
+                    }
+
+                    // Otherwise we need to switch back to the parent lex mode and
+                    // return the end of the string.
+                    size_t eol_length = match_eol_at(parser, breakpoint);
+                    if (eol_length) {
+                        parser->current.end = breakpoint + eol_length;
+
+                        // Track the newline if we're not in a heredoc that
+                        // would have already have added the newline to the
+                        // list.
+                        if (parser->heredoc_end == NULL) {
+                            pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+                        }
+                    } else {
+                        parser->current.end = breakpoint + 1;
+                    }
+
+                    if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
+                        parser->current.end++;
+                        lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
+                        lex_mode_pop(parser);
+                        LEX(PM_TOKEN_LABEL_END);
+                    }
+
+                    // When the delimiter itself is a newline, we won't
+                    // get a chance to flush heredocs in the usual places since
+                    // the newline is already consumed.
+                    if (term == '\n' && parser->heredoc_end) {
+                        parser_flush_heredoc_end(parser);
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_END);
+                    lex_mode_pop(parser);
+                    LEX(PM_TOKEN_STRING_END);
+                }
+
+                switch (*breakpoint) {
+                    case '\0':
+                        // Skip directly past the null character.
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        break;
+                    case '\r':
+                        if (peek_at(parser, breakpoint + 1) != '\n') {
+                            parser->current.end = breakpoint + 1;
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                            break;
+                        }
+
+                        // If we hit a \r\n sequence, then we need to treat it
+                        // as a newline.
+                        breakpoint++;
+                        parser->current.end = breakpoint;
+                        pm_token_buffer_escape(parser, &token_buffer);
+                        token_buffer.cursor = breakpoint;
+
+                        PRISM_FALLTHROUGH
+                    case '\n':
+                        // When we hit a newline, we need to flush any potential
+                        // heredocs. Note that this has to happen after we check
+                        // for the terminator in case the terminator is a
+                        // newline character.
+                        if (parser->heredoc_end == NULL) {
+                            pm_newline_list_append(&parser->newline_list, breakpoint);
+                            parser->current.end = breakpoint + 1;
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                            break;
+                        }
+
+                        parser->current.end = breakpoint + 1;
+                        parser_flush_heredoc_end(parser);
+                        pm_token_buffer_flush(parser, &token_buffer);
+                        LEX(PM_TOKEN_STRING_CONTENT);
+                    case '\\': {
+                        // Here we hit escapes.
+                        parser->current.end = breakpoint + 1;
+
+                        // If we've hit the end of the file, then break out of
+                        // the loop by setting the breakpoint to NULL.
+                        if (parser->current.end == parser->end) {
+                            breakpoint = NULL;
+                            continue;
+                        }
+
+                        pm_token_buffer_escape(parser, &token_buffer);
+                        uint8_t peeked = peek(parser);
+
+                        switch (peeked) {
+                            case '\\':
+                                pm_token_buffer_push_byte(&token_buffer, '\\');
+                                parser->current.end++;
+                                break;
+                            case '\r':
+                                parser->current.end++;
+                                if (peek(parser) != '\n') {
+                                    if (!lex_mode->as.string.interpolation) {
+                                        pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    }
+                                    pm_token_buffer_push_byte(&token_buffer, '\r');
+                                    break;
+                                }
+                            PRISM_FALLTHROUGH
+                            case '\n':
+                                if (!lex_mode->as.string.interpolation) {
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_byte(&token_buffer, '\n');
+                                }
+
+                                if (parser->heredoc_end) {
+                                    // ... if we are on the same line as a heredoc,
+                                    // flush the heredoc and continue parsing after
+                                    // heredoc_end.
+                                    parser_flush_heredoc_end(parser);
+                                    pm_token_buffer_copy(parser, &token_buffer);
+                                    LEX(PM_TOKEN_STRING_CONTENT);
+                                } else {
+                                    // ... else track the newline.
+                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                }
+
+                                parser->current.end++;
+                                break;
+                            default:
+                                if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
+                                    pm_token_buffer_push_byte(&token_buffer, peeked);
+                                    parser->current.end++;
+                                } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
+                                    pm_token_buffer_push_byte(&token_buffer, peeked);
+                                    parser->current.end++;
+                                } else if (lex_mode->as.string.interpolation) {
+                                    escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
+                                } else {
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_escaped(&token_buffer, parser);
+                                }
+
+                                break;
+                        }
+
+                        token_buffer.cursor = parser->current.end;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        break;
+                    }
+                    case '#': {
+                        pm_token_type_t type = lex_interpolation(parser, breakpoint);
+
+                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                            // If we haven't returned at this point then we had something that
+                            // looked like an interpolated class or instance variable like "#@"
+                            // but wasn't actually. In this case we'll just skip to the next
+                            // breakpoint.
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                            break;
+                        }
+
+                        if (type == PM_TOKEN_STRING_CONTENT) {
+                            pm_token_buffer_flush(parser, &token_buffer);
+                        }
+
+                        LEX(type);
+                    }
+                    default:
+                        assert(false && "unreachable");
+                }
+            }
+
+            if (parser->current.end > parser->current.start) {
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
+
+            // If we've hit the end of the string, then this is an unterminated
+            // string. In that case we'll return a string content token.
+            parser->current.end = parser->end;
+            pm_token_buffer_flush(parser, &token_buffer);
+            LEX(PM_TOKEN_STRING_CONTENT);
+        }
+        case PM_LEX_HEREDOC: {
+            // First, we'll set to start of this token.
+            if (parser->next_start == NULL) {
+                parser->current.start = parser->current.end;
+            } else {
+                parser->current.start = parser->next_start;
+                parser->current.end = parser->next_start;
+                parser->heredoc_end = NULL;
+                parser->next_start = NULL;
+            }
+
+            // Now let's grab the information about the identifier off of the
+            // current lex mode.
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+            pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
+
+            bool line_continuation = lex_mode->as.heredoc.line_continuation;
+            lex_mode->as.heredoc.line_continuation = false;
+
+            // We'll check if we're at the end of the file. If we are, then we
+            // will add an error (because we weren't able to find the
+            // terminator) but still continue parsing so that content after the
+            // declaration of the heredoc can be parsed.
+            if (parser->current.end >= parser->end) {
+                pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
+                parser->next_start = lex_mode->as.heredoc.next_start;
+                parser->heredoc_end = parser->current.end;
+                lex_state_set(parser, PM_LEX_STATE_END);
+                lex_mode_pop(parser);
+                LEX(PM_TOKEN_HEREDOC_END);
+            }
+
+            const uint8_t *ident_start = heredoc_lex_mode->ident_start;
+            size_t ident_length = heredoc_lex_mode->ident_length;
+
+            // If we are immediately following a newline and we have hit the
+            // terminator, then we need to return the ending of the heredoc.
+            if (current_token_starts_line(parser)) {
+                const uint8_t *start = parser->current.start;
+
+                if (!line_continuation && (start + ident_length <= parser->end)) {
+                    const uint8_t *newline = next_newline(start, parser->end - start);
+                    const uint8_t *ident_end = newline;
+                    const uint8_t *terminator_end = newline;
+
+                    if (newline == NULL) {
+                        terminator_end = parser->end;
+                        ident_end = parser->end;
+                    } else {
+                        terminator_end++;
+                        if (newline[-1] == '\r') {
+                            ident_end--; // Remove \r
+                        }
+                    }
+
+                    const uint8_t *terminator_start = ident_end - ident_length;
+                    const uint8_t *cursor = start;
+
+                    if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
+                        while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
+                            cursor++;
+                        }
+                    }
+
+                    if (
+                        (cursor == terminator_start) &&
+                        (memcmp(terminator_start, ident_start, ident_length) == 0)
+                    ) {
+                        if (newline != NULL) {
+                            pm_newline_list_append(&parser->newline_list, newline);
+                        }
+
+                        parser->current.end = terminator_end;
+                        if (*lex_mode->as.heredoc.next_start == '\\') {
+                            parser->next_start = NULL;
+                        } else {
+                            parser->next_start = lex_mode->as.heredoc.next_start;
+                            parser->heredoc_end = parser->current.end;
+                        }
+
+                        lex_state_set(parser, PM_LEX_STATE_END);
+                        lex_mode_pop(parser);
+                        LEX(PM_TOKEN_HEREDOC_END);
+                    }
+                }
+
+                size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
+                if (
+                    heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
+                    lex_mode->as.heredoc.common_whitespace != NULL &&
+                    (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
+                    peek_at(parser, start) != '\n'
+                ) {
+                    *lex_mode->as.heredoc.common_whitespace = whitespace;
+                }
+            }
+
+            // Otherwise we'll be parsing string content. These are the places
+            // where we need to split up the content of the heredoc. We'll use
+            // strpbrk to find the first of these characters.
+            uint8_t breakpoints[] = "\r\n\\#";
+
+            pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
+            if (quote == PM_HEREDOC_QUOTE_SINGLE) {
+                breakpoints[3] = '\0';
+            }
+
+            const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+            pm_token_buffer_t token_buffer = { 0 };
+            bool was_line_continuation = false;
+
+            while (breakpoint != NULL) {
+                switch (*breakpoint) {
+                    case '\0':
+                        // Skip directly past the null character.
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        break;
+                    case '\r':
+                        parser->current.end = breakpoint + 1;
+
+                        if (peek_at(parser, breakpoint + 1) != '\n') {
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                            break;
+                        }
+
+                        // If we hit a \r\n sequence, then we want to replace it
+                        // with a single \n character in the final string.
+                        breakpoint++;
+                        pm_token_buffer_escape(parser, &token_buffer);
+                        token_buffer.cursor = breakpoint;
+
+                        PRISM_FALLTHROUGH
+                    case '\n': {
+                        if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
+                            parser_flush_heredoc_end(parser);
+                            parser->current.end = breakpoint + 1;
+                            pm_token_buffer_flush(parser, &token_buffer);
+                            LEX(PM_TOKEN_STRING_CONTENT);
+                        }
+
+                        pm_newline_list_append(&parser->newline_list, breakpoint);
+
+                        // If we have a - or ~ heredoc, then we can match after
+                        // some leading whitespace.
+                        const uint8_t *start = breakpoint + 1;
+
+                        if (!was_line_continuation && (start + ident_length <= parser->end)) {
+                            // We want to match the terminator starting from the end of the line in case
+                            // there is whitespace in the ident such as <<-'   DOC' or <<~'   DOC'.
+                            const uint8_t *newline = next_newline(start, parser->end - start);
+
+                            if (newline == NULL) {
+                                newline = parser->end;
+                            } else if (newline[-1] == '\r') {
+                                newline--; // Remove \r
+                            }
+
+                            // Start of a possible terminator.
+                            const uint8_t *terminator_start = newline - ident_length;
+
+                            // Cursor to check for the leading whitespace. We skip the
+                            // leading whitespace if we have a - or ~ heredoc.
+                            const uint8_t *cursor = start;
+
+                            if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
+                                while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
+                                    cursor++;
+                                }
+                            }
+
+                            if (
+                                cursor == terminator_start &&
+                                (memcmp(terminator_start, ident_start, ident_length) == 0)
+                            ) {
+                                parser->current.end = breakpoint + 1;
+                                pm_token_buffer_flush(parser, &token_buffer);
+                                LEX(PM_TOKEN_STRING_CONTENT);
+                            }
+                        }
+
+                        size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
+
+                        // If we have hit a newline that is followed by a valid
+                        // terminator, then we need to return the content of the
+                        // heredoc here as string content. Then, the next time a
+                        // token is lexed, it will match again and return the
+                        // end of the heredoc.
+                        if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
+                            if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
+                                *lex_mode->as.heredoc.common_whitespace = whitespace;
+                            }
+
+                            parser->current.end = breakpoint + 1;
+                            pm_token_buffer_flush(parser, &token_buffer);
+                            LEX(PM_TOKEN_STRING_CONTENT);
+                        }
+
+                        // Otherwise we hit a newline and it wasn't followed by
+                        // a terminator, so we can continue parsing.
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        break;
+                    }
+                    case '\\': {
+                        // If we hit an escape, then we need to skip past
+                        // however many characters the escape takes up. However
+                        // it's important that if \n or \r\n are escaped, we
+                        // stop looping before the newline and not after the
+                        // newline so that we can still potentially find the
+                        // terminator of the heredoc.
+                        parser->current.end = breakpoint + 1;
+
+                        // If we've hit the end of the file, then break out of
+                        // the loop by setting the breakpoint to NULL.
+                        if (parser->current.end == parser->end) {
+                            breakpoint = NULL;
+                            continue;
+                        }
+
+                        pm_token_buffer_escape(parser, &token_buffer);
+                        uint8_t peeked = peek(parser);
+
+                        if (quote == PM_HEREDOC_QUOTE_SINGLE) {
+                            switch (peeked) {
+                                case '\r':
+                                    parser->current.end++;
+                                    if (peek(parser) != '\n') {
+                                        pm_token_buffer_push_byte(&token_buffer, '\\');
+                                        pm_token_buffer_push_byte(&token_buffer, '\r');
+                                        break;
+                                    }
+                                PRISM_FALLTHROUGH
+                                case '\n':
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_byte(&token_buffer, '\n');
+                                    token_buffer.cursor = parser->current.end + 1;
+                                    breakpoint = parser->current.end;
+                                    continue;
+                                default:
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_escaped(&token_buffer, parser);
+                                    break;
+                            }
+                        } else {
+                            switch (peeked) {
+                                case '\r':
+                                    parser->current.end++;
+                                    if (peek(parser) != '\n') {
+                                        pm_token_buffer_push_byte(&token_buffer, '\r');
+                                        break;
+                                    }
+                                PRISM_FALLTHROUGH
+                                case '\n':
+                                    // If we are in a tilde here, we should
+                                    // break out of the loop and return the
+                                    // string content.
+                                    if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
+                                        const uint8_t *end = parser->current.end;
+
+                                        if (parser->heredoc_end == NULL) {
+                                            pm_newline_list_append(&parser->newline_list, end);
+                                        }
+
+                                        // Here we want the buffer to only
+                                        // include up to the backslash.
+                                        parser->current.end = breakpoint;
+                                        pm_token_buffer_flush(parser, &token_buffer);
+
+                                        // Now we can advance the end of the
+                                        // token past the newline.
+                                        parser->current.end = end + 1;
+                                        lex_mode->as.heredoc.line_continuation = true;
+                                        LEX(PM_TOKEN_STRING_CONTENT);
+                                    }
+
+                                    was_line_continuation = true;
+                                    token_buffer.cursor = parser->current.end + 1;
+                                    breakpoint = parser->current.end;
+                                    continue;
+                                default:
+                                    escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
+                                    break;
+                            }
+                        }
+
+                        token_buffer.cursor = parser->current.end;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                        break;
+                    }
+                    case '#': {
+                        pm_token_type_t type = lex_interpolation(parser, breakpoint);
+
+                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                            // If we haven't returned at this point then we had
+                            // something that looked like an interpolated class
+                            // or instance variable like "#@" but wasn't
+                            // actually. In this case we'll just skip to the
+                            // next breakpoint.
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
+                            break;
+                        }
+
+                        if (type == PM_TOKEN_STRING_CONTENT) {
+                            pm_token_buffer_flush(parser, &token_buffer);
+                        }
+
+                        LEX(type);
+                    }
+                    default:
+                        assert(false && "unreachable");
+                }
+
+                was_line_continuation = false;
+            }
+
+            if (parser->current.end > parser->current.start) {
+                parser->current.end = parser->end;
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
+
+            // If we've hit the end of the string, then this is an unterminated
+            // heredoc. In that case we'll return a string content token.
+            parser->current.end = parser->end;
+            pm_token_buffer_flush(parser, &token_buffer);
+            LEX(PM_TOKEN_STRING_CONTENT);
+        }
+    }
+
+    assert(false && "unreachable");
+}
+
+#undef LEX
+
+/******************************************************************************/
+/* Parse functions                                                            */
+/******************************************************************************/
+
+/**
+ * These are the various precedence rules. Because we are using a Pratt parser,
+ * they are named binding power to represent the manner in which nodes are bound
+ * together in the stack.
+ *
+ * We increment by 2 because we want to leave room for the infix operators to
+ * specify their associativity by adding or subtracting one.
+ */
+typedef enum {
+    PM_BINDING_POWER_UNSET =             0, // used to indicate this token cannot be used as an infix operator
+    PM_BINDING_POWER_STATEMENT =         2,
+    PM_BINDING_POWER_MODIFIER_RESCUE =   4, // rescue
+    PM_BINDING_POWER_MODIFIER =          6, // if unless until while
+    PM_BINDING_POWER_COMPOSITION =       8, // and or
+    PM_BINDING_POWER_NOT =              10, // not
+    PM_BINDING_POWER_MATCH =            12, // => in
+    PM_BINDING_POWER_DEFINED =          14, // defined?
+    PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
+    PM_BINDING_POWER_ASSIGNMENT =       18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
+    PM_BINDING_POWER_TERNARY =          20, // ?:
+    PM_BINDING_POWER_RANGE =            22, // .. ...
+    PM_BINDING_POWER_LOGICAL_OR =       24, // ||
+    PM_BINDING_POWER_LOGICAL_AND =      26, // &&
+    PM_BINDING_POWER_EQUALITY =         28, // <=> == === != =~ !~
+    PM_BINDING_POWER_COMPARISON =       30, // > >= < <=
+    PM_BINDING_POWER_BITWISE_OR =       32, // | ^
+    PM_BINDING_POWER_BITWISE_AND =      34, // &
+    PM_BINDING_POWER_SHIFT =            36, // << >>
+    PM_BINDING_POWER_TERM =             38, // + -
+    PM_BINDING_POWER_FACTOR =           40, // * / %
+    PM_BINDING_POWER_UMINUS =           42, // -@
+    PM_BINDING_POWER_EXPONENT =         44, // **
+    PM_BINDING_POWER_UNARY =            46, // ! ~ +@
+    PM_BINDING_POWER_INDEX =            48, // [] []=
+    PM_BINDING_POWER_CALL =             50, // :: .
+    PM_BINDING_POWER_MAX =              52
+} pm_binding_power_t;
+
+/**
+ * This struct represents a set of binding powers used for a given token. They
+ * are combined in this way to make it easier to represent associativity.
+ */
+typedef struct {
+    /** The left binding power. */
+    pm_binding_power_t left;
+
+    /** The right binding power. */
+    pm_binding_power_t right;
+
+    /** Whether or not this token can be used as a binary operator. */
+    bool binary;
+
+    /**
+     * Whether or not this token can be used as non-associative binary operator.
+     * Non-associative operators (e.g. in and =>) need special treatment in parse_expression.
+     */
+    bool nonassoc;
+} pm_binding_powers_t;
+
+#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
+#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
+#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
+#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
+#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
+
+pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
+    // rescue
+    [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
+
+    // if unless until while
+    [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
+    [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
+    [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
+    [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
+
+    // and or
+    [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
+    [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
+
+    // => in
+    [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
+    [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
+
+    // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
+    [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
+    [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
+
+    // ?:
+    [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
+
+    // .. ...
+    [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
+    [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
+    [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
+    [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
+
+    // ||
+    [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
+
+    // &&
+    [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
+
+    // != !~ == === =~ <=>
+    [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
+    [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
+    [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
+    [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
+    [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
+    [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
+
+    // > >= < <=
+    [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
+    [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
+    [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
+    [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
+
+    // ^ |
+    [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
+    [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
+
+    // &
+    [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
+
+    // >> <<
+    [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
+    [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
+
+    // - +
+    [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
+    [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
+
+    // % / *
+    [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
+    [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
+    [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
+    [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
+
+    // -@
+    [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
+    [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
+
+    // **
+    [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
+    [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
+
+    // ! ~ +@
+    [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
+    [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
+    [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
+
+    // [
+    [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
+
+    // :: . &.
+    [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
+    [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
+    [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
+};
+
+#undef BINDING_POWER_ASSIGNMENT
+#undef LEFT_ASSOCIATIVE
+#undef RIGHT_ASSOCIATIVE
+#undef RIGHT_ASSOCIATIVE_UNARY
+
+/**
+ * Returns true if the current token is of the given type.
+ */
+static inline bool
+match1(const pm_parser_t *parser, pm_token_type_t type) {
+    return parser->current.type == type;
+}
+
+/**
+ * Returns true if the current token is of either of the given types.
+ */
+static inline bool
+match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
+    return match1(parser, type1) || match1(parser, type2);
+}
+
+/**
+ * Returns true if the current token is any of the three given types.
+ */
+static inline bool
+match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
+    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
+}
+
+/**
+ * Returns true if the current token is any of the four given types.
+ */
+static inline bool
+match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
+    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
+}
+
+/**
+ * Returns true if the current token is any of the seven given types.
+ */
+static inline bool
+match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
+    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
+}
+
+/**
+ * Returns true if the current token is any of the eight given types.
+ */
+static inline bool
+match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
+    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
+}
+
+/**
+ * If the current token is of the specified type, lex forward by one token and
+ * return true. Otherwise, return false. For example:
+ *
+ *     if (accept1(parser, PM_TOKEN_COLON)) { ... }
+ */
+static bool
+accept1(pm_parser_t *parser, pm_token_type_t type) {
+    if (match1(parser, type)) {
+        parser_lex(parser);
+        return true;
+    }
+    return false;
+}
+
+/**
+ * If the current token is either of the two given types, lex forward by one
+ * token and return true. Otherwise return false.
+ */
+static inline bool
+accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
+    if (match2(parser, type1, type2)) {
+        parser_lex(parser);
+        return true;
+    }
+    return false;
+}
+
+/**
+ * This function indicates that the parser expects a token in a specific
+ * position. For example, if you're parsing a BEGIN block, you know that a { is
+ * expected immediately after the keyword. In that case you would call this
+ * function to indicate that that token should be found.
+ *
+ * If we didn't find the token that we were expecting, then we're going to add
+ * an error to the parser's list of errors (to indicate that the tree is not
+ * valid) and create an artificial token instead. This allows us to recover from
+ * the fact that the token isn't present and continue parsing.
+ */
+static void
+expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
+    if (accept1(parser, type)) return;
+
+    const uint8_t *location = parser->previous.end;
+    pm_parser_err(parser, location, location, diag_id);
+
+    parser->previous.start = location;
+    parser->previous.type = PM_TOKEN_MISSING;
+}
+
+/**
+ * This function is the same as expect1, but it expects either of two token
+ * types.
+ */
+static void
+expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
+    if (accept2(parser, type1, type2)) return;
+
+    const uint8_t *location = parser->previous.end;
+    pm_parser_err(parser, location, location, diag_id);
+
+    parser->previous.start = location;
+    parser->previous.type = PM_TOKEN_MISSING;
+}
+
+/**
+ * A special expect1 that expects a heredoc terminator and handles popping the
+ * lex mode accordingly.
+ */
+static void
+expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
+    if (match1(parser, PM_TOKEN_HEREDOC_END)) {
+        parser_lex(parser);
+    } else {
+        pm_parser_err_heredoc_term(parser, ident_start, ident_length);
+        parser->previous.start = parser->previous.end;
+        parser->previous.type = PM_TOKEN_MISSING;
+    }
+}
+
+/**
+ * A special expect1 that attaches the error to the opening token location
+ * rather than the current position. This is useful for errors about missing
+ * closing tokens, where we want to point to the line with the opening token
+ * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file.
+ */
+static void
+expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
+    if (accept1(parser, type)) return;
+
+    pm_parser_err(parser, opening->start, opening->end, diag_id);
+
+    parser->previous.start = parser->previous.end;
+    parser->previous.type = PM_TOKEN_MISSING;
+}
+
+static pm_node_t *
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
+
+/**
+ * This is a wrapper of parse_expression, which also checks whether the
+ * resulting node is a value expression.
+ */
+static pm_node_t *
+parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+    pm_assert_value_expression(parser, node);
+    return node;
+}
+
+/**
+ * This function controls whether or not we will attempt to parse an expression
+ * beginning at the subsequent token. It is used when we are in a context where
+ * an expression is optional.
+ *
+ * For example, looking at a range object when we've already lexed the operator,
+ * we need to know if we should attempt to parse an expression on the right.
+ *
+ * For another example, if we've parsed an identifier or a method call and we do
+ * not have parentheses, then the next token may be the start of an argument or
+ * it may not.
+ *
+ * CRuby parsers that are generated would resolve this by using a lookahead and
+ * potentially backtracking. We attempt to do this by just looking at the next
+ * token and making a decision based on that. I am not sure if this is going to
+ * work in all cases, it may need to be refactored later. But it appears to work
+ * for now.
+ */
+static inline bool
+token_begins_expression_p(pm_token_type_t type) {
+    switch (type) {
+        case PM_TOKEN_EQUAL_GREATER:
+        case PM_TOKEN_KEYWORD_IN:
+            // We need to special case this because it is a binary operator that
+            // should not be marked as beginning an expression.
+            return false;
+        case PM_TOKEN_BRACE_RIGHT:
+        case PM_TOKEN_BRACKET_RIGHT:
+        case PM_TOKEN_COLON:
+        case PM_TOKEN_COMMA:
+        case PM_TOKEN_EMBEXPR_END:
+        case PM_TOKEN_EOF:
+        case PM_TOKEN_LAMBDA_BEGIN:
+        case PM_TOKEN_KEYWORD_DO:
+        case PM_TOKEN_KEYWORD_DO_LOOP:
+        case PM_TOKEN_KEYWORD_END:
+        case PM_TOKEN_KEYWORD_ELSE:
+        case PM_TOKEN_KEYWORD_ELSIF:
+        case PM_TOKEN_KEYWORD_ENSURE:
+        case PM_TOKEN_KEYWORD_THEN:
+        case PM_TOKEN_KEYWORD_RESCUE:
+        case PM_TOKEN_KEYWORD_WHEN:
+        case PM_TOKEN_NEWLINE:
+        case PM_TOKEN_PARENTHESIS_RIGHT:
+        case PM_TOKEN_SEMICOLON:
+            // The reason we need this short-circuit is because we're using the
+            // binding powers table to tell us if the subsequent token could
+            // potentially be the start of an expression. If there _is_ a binding
+            // power for one of these tokens, then we should remove it from this list
+            // and let it be handled by the default case below.
+            assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
+            return false;
+        case PM_TOKEN_UAMPERSAND:
+            // This is a special case because this unary operator cannot appear
+            // as a general operator, it only appears in certain circumstances.
+            return false;
+        case PM_TOKEN_UCOLON_COLON:
+        case PM_TOKEN_UMINUS:
+        case PM_TOKEN_UMINUS_NUM:
+        case PM_TOKEN_UPLUS:
+        case PM_TOKEN_BANG:
+        case PM_TOKEN_TILDE:
+        case PM_TOKEN_UDOT_DOT:
+        case PM_TOKEN_UDOT_DOT_DOT:
+            // These unary tokens actually do have binding power associated with them
+            // so that we can correctly place them into the precedence order. But we
+            // want them to be marked as beginning an expression, so we need to
+            // special case them here.
+            return true;
+        default:
+            return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
+    }
+}
+
+/**
+ * Parse an expression with the given binding power that may be optionally
+ * prefixed by the * operator.
+ */
+static pm_node_t *
+parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    if (accept1(parser, PM_TOKEN_USTAR)) {
+        pm_token_t operator = parser->previous;
+        pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+        return UP(pm_splat_node_create(parser, &operator, expression));
+    }
+
+    return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
+}
+
+static bool
+pm_node_unreference_each(const pm_node_t *node, void *data) {
+    switch (PM_NODE_TYPE(node)) {
+        /* When we are about to destroy a set of nodes that could potentially
+         * contain block exits for the current scope, we need to check if they
+         * are contained in the list of block exits and remove them if they are.
+         */
+        case PM_BREAK_NODE:
+        case PM_NEXT_NODE:
+        case PM_REDO_NODE: {
+            pm_parser_t *parser = (pm_parser_t *) data;
+            size_t index = 0;
+
+            while (index < parser->current_block_exits->size) {
+                pm_node_t *block_exit = parser->current_block_exits->nodes[index];
+
+                if (block_exit == node) {
+                    if (index + 1 < parser->current_block_exits->size) {
+                        memmove(
+                            &parser->current_block_exits->nodes[index],
+                            &parser->current_block_exits->nodes[index + 1],
+                            (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
+                        );
+                    }
+                    parser->current_block_exits->size--;
+
+                    /* Note returning true here because these nodes could have
+                     * arguments that are themselves block exits. */
+                    return true;
+                }
+
+                index++;
+            }
+
+            return true;
+        }
+        /* When an implicit local variable is written to or targeted, it becomes
+         * a regular, named local variable. This branch removes it from the list
+         * of implicit parameters when that happens. */
+        case PM_LOCAL_VARIABLE_READ_NODE:
+        case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+            pm_parser_t *parser = (pm_parser_t *) data;
+            pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+            for (size_t index = 0; index < implicit_parameters->size; index++) {
+                if (implicit_parameters->nodes[index] == node) {
+                    /* If the node is not the last one in the list, we need to
+                     * shift the remaining nodes down to fill the gap. This is
+                     * extremely unlikely to happen. */
+                    if (index != implicit_parameters->size - 1) {
+                        memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
+                    }
+
+                    implicit_parameters->size--;
+                    break;
+                }
+            }
+
+            return false;
+        }
+        default:
+            return true;
+    }
+}
+
+/**
+ * When we are about to destroy a set of nodes that could potentially be
+ * referenced by one or more lists on the parser, then remove them from those
+ * lists so we don't get a use-after-free.
+ */
+static void
+pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
+    pm_visit_node(node, pm_node_unreference_each, parser);
+}
+
+/**
+ * Convert the name of a method into the corresponding write method name. For
+ * example, foo would be turned into foo=.
+ */
+static void
+parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
+    // The method name needs to change. If we previously had
+    // foo, we now need foo=. In this case we'll allocate a new
+    // owned string, copy the previous method name in, and
+    // append an =.
+    pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
+    size_t length = constant->length;
+    uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
+    if (name == NULL) return;
+
+    memcpy(name, constant->start, length);
+    name[length] = '=';
+
+    // Now switch the name to the new string.
+    // This silences clang analyzer warning about leak of memory pointed by `name`.
+    // NOLINTNEXTLINE(clang-analyzer-*)
+    *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
+}
+
+/**
+ * Certain expressions are not targetable, but in order to provide a better
+ * experience we give a specific error message. In order to maintain as much
+ * information in the tree as possible, we replace them with local variable
+ * writes.
+ */
+static pm_node_t *
+parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
+    switch (PM_NODE_TYPE(target)) {
+        case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
+        case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
+        case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
+        case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
+        case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
+        case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
+        case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
+        default: break;
+    }
+
+    pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+    pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
+
+    pm_node_destroy(parser, target);
+    return UP(result);
+}
+
+/**
+ * Convert the given node into a valid target node.
+ *
+ * @param multiple Whether or not this target is part of a larger set of
+ *   targets. If it is, then the &. operator is not allowed.
+ * @param splat Whether or not this target is a child of a splat target. If it
+ *   is, then fewer patterns are allowed.
+ */
+static pm_node_t *
+parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
+    switch (PM_NODE_TYPE(target)) {
+        case PM_MISSING_NODE:
+            return target;
+        case PM_SOURCE_ENCODING_NODE:
+        case PM_FALSE_NODE:
+        case PM_SOURCE_FILE_NODE:
+        case PM_SOURCE_LINE_NODE:
+        case PM_NIL_NODE:
+        case PM_SELF_NODE:
+        case PM_TRUE_NODE: {
+            // In these special cases, we have specific error messages and we
+            // will replace them with local variable writes.
+            return parse_unwriteable_target(parser, target);
+        }
+        case PM_CLASS_VARIABLE_READ_NODE:
+            assert(sizeof(pm_class_variable_target_node_t) == sizeof(pm_class_variable_read_node_t));
+            target->type = PM_CLASS_VARIABLE_TARGET_NODE;
+            return target;
+        case PM_CONSTANT_PATH_NODE:
+            if (context_def_p(parser)) {
+                pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
+            }
+
+            assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
+            target->type = PM_CONSTANT_PATH_TARGET_NODE;
+
+            return target;
+        case PM_CONSTANT_READ_NODE:
+            if (context_def_p(parser)) {
+                pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
+            }
+
+            assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
+            target->type = PM_CONSTANT_TARGET_NODE;
+
+            return target;
+        case PM_BACK_REFERENCE_READ_NODE:
+        case PM_NUMBERED_REFERENCE_READ_NODE:
+            PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
+            return target;
+        case PM_GLOBAL_VARIABLE_READ_NODE:
+            assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
+            target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
+            return target;
+        case PM_LOCAL_VARIABLE_READ_NODE: {
+            if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+                PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
+                pm_node_unreference(parser, target);
+            }
+
+            const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
+            uint32_t name = cast->name;
+            uint32_t depth = cast->depth;
+            pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
+
+            assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
+            target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
+
+            return target;
+        }
+        case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+            pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+            pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
+
+            pm_node_unreference(parser, target);
+            pm_node_destroy(parser, target);
+
+            return node;
+        }
+        case PM_INSTANCE_VARIABLE_READ_NODE:
+            assert(sizeof(pm_instance_variable_target_node_t) == sizeof(pm_instance_variable_read_node_t));
+            target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
+            return target;
+        case PM_MULTI_TARGET_NODE:
+            if (splat_parent) {
+                // Multi target is not accepted in all positions. If this is one
+                // of them, then we need to add an error.
+                pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            }
+
+            return target;
+        case PM_SPLAT_NODE: {
+            pm_splat_node_t *splat = (pm_splat_node_t *) target;
+
+            if (splat->expression != NULL) {
+                splat->expression = parse_target(parser, splat->expression, multiple, true);
+            }
+
+            return UP(splat);
+        }
+        case PM_CALL_NODE: {
+            pm_call_node_t *call = (pm_call_node_t *) target;
+
+            // If we have no arguments to the call node and we need this to be a
+            // target then this is either a method call or a local variable
+            // write.
+            if (
+                (call->message_loc.start != NULL) &&
+                (call->message_loc.end[-1] != '!') &&
+                (call->message_loc.end[-1] != '?') &&
+                (call->opening_loc.start == NULL) &&
+                (call->arguments == NULL) &&
+                (call->block == NULL)
+            ) {
+                if (call->receiver == NULL) {
+                    // When we get here, we have a local variable write, because it
+                    // was previously marked as a method call but now we have an =.
+                    // This looks like:
+                    //
+                    //     foo = 1
+                    //
+                    // When it was parsed in the prefix position, foo was seen as a
+                    // method call with no receiver and no arguments. Now we have an
+                    // =, so we know it's a local variable write.
+                    const pm_location_t message_loc = call->message_loc;
+
+                    pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
+                    pm_node_destroy(parser, target);
+
+                    return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
+                }
+
+                if (peek_at(parser, call->message_loc.start) == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+                    if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
+                        pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
+                    }
+
+                    parse_write_name(parser, &call->name);
+                    return UP(pm_call_target_node_create(parser, call));
+                }
+            }
+
+            // If there is no call operator and the message is "[]" then this is
+            // an aref expression, and we can transform it into an aset
+            // expression.
+            if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
+                return UP(pm_index_target_node_create(parser, call));
+            }
+        }
+        PRISM_FALLTHROUGH
+        default:
+            // In this case we have a node that we don't know how to convert
+            // into a target. We need to treat it as an error. For now, we'll
+            // mark it as an error and just skip right past it.
+            pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            return target;
+    }
+}
+
+/**
+ * Parse a write target and validate that it is in a valid position for
+ * assignment.
+ */
+static pm_node_t *
+parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
+    pm_node_t *result = parse_target(parser, target, multiple, false);
+
+    // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
+    // parens after the targets.
+    if (
+        !match1(parser, PM_TOKEN_EQUAL) &&
+        !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
+        !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
+    ) {
+        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+    }
+
+    return result;
+}
+
+/**
+ * Potentially wrap a constant write node in a shareable constant node depending
+ * on the current state.
+ */
+static pm_node_t *
+parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
+    pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
+
+    if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
+        return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
+    }
+
+    return write;
+}
+
+/**
+ * Convert the given node into a valid write node.
+ */
+static pm_node_t *
+parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
+    switch (PM_NODE_TYPE(target)) {
+        case PM_MISSING_NODE:
+            pm_node_destroy(parser, value);
+            return target;
+        case PM_CLASS_VARIABLE_READ_NODE: {
+            pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
+            pm_node_destroy(parser, target);
+            return UP(node);
+        }
+        case PM_CONSTANT_PATH_NODE: {
+            pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
+
+            if (context_def_p(parser)) {
+                pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
+            }
+
+            return parse_shareable_constant_write(parser, node);
+        }
+        case PM_CONSTANT_READ_NODE: {
+            pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
+
+            if (context_def_p(parser)) {
+                pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
+            }
+
+            pm_node_destroy(parser, target);
+            return parse_shareable_constant_write(parser, node);
+        }
+        case PM_BACK_REFERENCE_READ_NODE:
+        case PM_NUMBERED_REFERENCE_READ_NODE:
+            PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
+            PRISM_FALLTHROUGH
+        case PM_GLOBAL_VARIABLE_READ_NODE: {
+            pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
+            pm_node_destroy(parser, target);
+            return UP(node);
+        }
+        case PM_LOCAL_VARIABLE_READ_NODE: {
+            pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
+
+            pm_constant_id_t name = local_read->name;
+            pm_location_t name_loc = target->location;
+
+            uint32_t depth = local_read->depth;
+            pm_scope_t *scope = pm_parser_scope_find(parser, depth);
+
+            if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+                pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
+                PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
+                pm_node_unreference(parser, target);
+            }
+
+            pm_locals_unread(&scope->locals, name);
+            pm_node_destroy(parser, target);
+
+            return UP(pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator));
+        }
+        case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+            pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+            pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
+
+            pm_node_unreference(parser, target);
+            pm_node_destroy(parser, target);
+
+            return node;
+        }
+        case PM_INSTANCE_VARIABLE_READ_NODE: {
+            pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
+            pm_node_destroy(parser, target);
+            return write_node;
+        }
+        case PM_MULTI_TARGET_NODE:
+            return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
+        case PM_SPLAT_NODE: {
+            pm_splat_node_t *splat = (pm_splat_node_t *) target;
+
+            if (splat->expression != NULL) {
+                splat->expression = parse_write(parser, splat->expression, operator, value);
+            }
+
+            pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+            pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
+
+            return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
+        }
+        case PM_CALL_NODE: {
+            pm_call_node_t *call = (pm_call_node_t *) target;
+
+            // If we have no arguments to the call node and we need this to be a
+            // target then this is either a method call or a local variable
+            // write.
+            if (
+                (call->message_loc.start != NULL) &&
+                (call->message_loc.end[-1] != '!') &&
+                (call->message_loc.end[-1] != '?') &&
+                (call->opening_loc.start == NULL) &&
+                (call->arguments == NULL) &&
+                (call->block == NULL)
+            ) {
+                if (call->receiver == NULL) {
+                    // When we get here, we have a local variable write, because it
+                    // was previously marked as a method call but now we have an =.
+                    // This looks like:
+                    //
+                    //     foo = 1
+                    //
+                    // When it was parsed in the prefix position, foo was seen as a
+                    // method call with no receiver and no arguments. Now we have an
+                    // =, so we know it's a local variable write.
+                    const pm_location_t message = call->message_loc;
+
+                    pm_parser_local_add_location(parser, message.start, message.end, 0);
+                    pm_node_destroy(parser, target);
+
+                    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
+                    target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator));
+
+                    pm_refute_numbered_parameter(parser, message.start, message.end);
+                    return target;
+                }
+
+                if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
+                    // When we get here, we have a method call, because it was
+                    // previously marked as a method call but now we have an =. This
+                    // looks like:
+                    //
+                    //     foo.bar = 1
+                    //
+                    // When it was parsed in the prefix position, foo.bar was seen as a
+                    // method call with no arguments. Now we have an =, so we know it's
+                    // a method call with an argument. In this case we will create the
+                    // arguments node, parse the argument, and add it to the list.
+                    pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+                    call->arguments = arguments;
+
+                    pm_arguments_node_arguments_append(arguments, value);
+                    call->base.location.end = arguments->base.location.end;
+                    call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
+
+                    parse_write_name(parser, &call->name);
+                    pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+
+                    return UP(call);
+                }
+            }
+
+            // If there is no call operator and the message is "[]" then this is
+            // an aref expression, and we can transform it into an aset
+            // expression.
+            if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
+                if (call->arguments == NULL) {
+                    call->arguments = pm_arguments_node_create(parser);
+                }
+
+                pm_arguments_node_arguments_append(call->arguments, value);
+                target->location.end = value->location.end;
+
+                // Replace the name with "[]=".
+                call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
+                call->equal_loc = PM_LOCATION_TOKEN_VALUE(operator);
+
+                // Ensure that the arguments for []= don't contain keywords
+                pm_index_arguments_check(parser, call->arguments, call->block);
+                pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+
+                return target;
+            }
+
+            // If there are arguments on the call node, then it can't be a
+            // method call ending with = or a local variable write, so it must
+            // be a syntax error. In this case we'll fall through to our default
+            // handling. We need to free the value that we parsed because there
+            // is no way for us to attach it to the tree at this point.
+            //
+            // Since it is possible for the value to contain an implicit
+            // parameter somewhere in its subtree, we need to walk it and remove
+            // any implicit parameters from the list of implicit parameters for
+            // the current scope.
+            pm_node_unreference(parser, value);
+            pm_node_destroy(parser, value);
+        }
+        PRISM_FALLTHROUGH
+        default:
+            // In this case we have a node that we don't know how to convert into a
+            // target. We need to treat it as an error. For now, we'll mark it as an
+            // error and just skip right past it.
+            pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            return target;
+    }
+}
+
+/**
+ * Certain expressions are not writable, but in order to provide a better
+ * experience we give a specific error message. In order to maintain as much
+ * information in the tree as possible, we replace them with local variable
+ * writes.
+ */
+static pm_node_t *
+parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
+    switch (PM_NODE_TYPE(target)) {
+        case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
+        case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
+        case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
+        case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
+        case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
+        case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
+        case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
+        default: break;
+    }
+
+    pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
+    pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
+
+    pm_node_destroy(parser, target);
+    return UP(result);
+}
+
+/**
+ * Parse a list of targets for assignment. This is used in the case of a for
+ * loop or a multi-assignment. For example, in the following code:
+ *
+ *     for foo, bar in baz
+ *         ^^^^^^^^
+ *
+ * The targets are `foo` and `bar`. This function will either return a single
+ * target node or a multi-target node.
+ */
+static pm_node_t *
+parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
+    bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
+
+    pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
+    pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
+
+    while (accept1(parser, PM_TOKEN_COMMA)) {
+        if (accept1(parser, PM_TOKEN_USTAR)) {
+            // Here we have a splat operator. It can have a name or be
+            // anonymous. It can be the final target or be in the middle if
+            // there haven't been any others yet.
+            if (has_rest) {
+                pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
+            }
+
+            pm_token_t star_operator = parser->previous;
+            pm_node_t *name = NULL;
+
+            if (token_begins_expression_p(parser->current.type)) {
+                name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                name = parse_target(parser, name, true, true);
+            }
+
+            pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
+            pm_multi_target_node_targets_append(parser, result, splat);
+            has_rest = true;
+        } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+            context_push(parser, PM_CONTEXT_MULTI_TARGET);
+            pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+            target = parse_target(parser, target, true, false);
+
+            pm_multi_target_node_targets_append(parser, result, target);
+            context_pop(parser);
+        } else if (token_begins_expression_p(parser->current.type)) {
+            pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+            target = parse_target(parser, target, true, false);
+
+            pm_multi_target_node_targets_append(parser, result, target);
+        } else if (!match1(parser, PM_TOKEN_EOF)) {
+            // If we get here, then we have a trailing , in a multi target node.
+            // We'll add an implicit rest node to represent this.
+            pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+            pm_multi_target_node_targets_append(parser, result, rest);
+            break;
+        }
+    }
+
+    return UP(result);
+}
+
+/**
+ * Parse a list of targets and validate that it is in a valid position for
+ * assignment.
+ */
+static pm_node_t *
+parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
+    pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
+    accept1(parser, PM_TOKEN_NEWLINE);
+
+    // Ensure that we have either an = or a ) after the targets.
+    if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+    }
+
+    return result;
+}
+
+/**
+ * Parse a list of statements separated by newlines or semicolons.
+ */
+static pm_statements_node_t *
+parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
+    // First, skip past any optional terminators that might be at the beginning
+    // of the statements.
+    while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
+
+    // If we have a terminator, then we can just return NULL.
+    if (context_terminator(context, &parser->current)) return NULL;
+
+    pm_statements_node_t *statements = pm_statements_node_create(parser);
+
+    // At this point we know we have at least one statement, and that it
+    // immediately follows the current token.
+    context_push(parser, context);
+
+    while (true) {
+        pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+        pm_statements_node_body_append(parser, statements, node, true);
+
+        // If we're recovering from a syntax error, then we need to stop parsing
+        // the statements now.
+        if (parser->recovering) {
+            // If this is the level of context where the recovery has happened,
+            // then we can mark the parser as done recovering.
+            if (context_terminator(context, &parser->current)) parser->recovering = false;
+            break;
+        }
+
+        // If we have a terminator, then we will parse all consecutive
+        // terminators and then continue parsing the statements list.
+        if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            // If we have a terminator, then we will continue parsing the
+            // statements list.
+            while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+            if (context_terminator(context, &parser->current)) break;
+
+            // Now we can continue parsing the list of statements.
+            continue;
+        }
+
+        // At this point we have a list of statements that are not terminated by
+        // a newline or semicolon. At this point we need to check if we're at
+        // the end of the statements list. If we are, then we should break out
+        // of the loop.
+        if (context_terminator(context, &parser->current)) break;
+
+        // At this point, we have a syntax error, because the statement was not
+        // terminated by a newline or semicolon, and we're not at the end of the
+        // statements list. Ideally we should scan forward to determine if we
+        // should insert a missing terminator or break out of parsing the
+        // statements list at this point.
+        //
+        // We don't have that yet, so instead we'll do a more naive approach. If
+        // we were unable to parse an expression, then we will skip past this
+        // token and continue parsing the statements list. Otherwise we'll add
+        // an error and continue parsing the statements list.
+        if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
+            parser_lex(parser);
+
+            // If we are at the end of the file, then we need to stop parsing
+            // the statements entirely at this point. Mark the parser as
+            // recovering, as we know that EOF closes the top-level context, and
+            // then break out of the loop.
+            if (match1(parser, PM_TOKEN_EOF)) {
+                parser->recovering = true;
+                break;
+            }
+
+            while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+            if (context_terminator(context, &parser->current)) break;
+        } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
+            // This is an inlined version of accept1 because the error that we
+            // want to add has varargs. If this happens again, we should
+            // probably extract a helper function.
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+            parser->previous.start = parser->previous.end;
+            parser->previous.type = PM_TOKEN_MISSING;
+        }
+    }
+
+    context_pop(parser);
+    bool last_value = true;
+    switch (context) {
+        case PM_CONTEXT_BEGIN_ENSURE:
+        case PM_CONTEXT_DEF_ENSURE:
+            last_value = false;
+            break;
+        default:
+            break;
+    }
+    pm_void_statements_check(parser, statements, last_value);
+
+    return statements;
+}
+
+/**
+ * Add a node to a set of static literals that holds a set of hash keys. If the
+ * node is a duplicate, then add an appropriate warning.
+ */
+static void
+pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+    const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
+
+    if (duplicated != NULL) {
+        pm_buffer_t buffer = { 0 };
+        pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
+
+        pm_diagnostic_list_append_format(
+            &parser->warning_list,
+            duplicated->location.start,
+            duplicated->location.end,
+            PM_WARN_DUPLICATED_HASH_KEY,
+            (int) pm_buffer_length(&buffer),
+            pm_buffer_value(&buffer),
+            pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+        );
+
+        pm_buffer_free(&buffer);
+    }
+}
+
+/**
+ * Add a node to a set of static literals that holds a set of hash keys. If the
+ * node is a duplicate, then add an appropriate warning.
+ */
+static void
+pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+    pm_node_t *previous;
+
+    if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
+        pm_diagnostic_list_append_format(
+            &parser->warning_list,
+            node->location.start,
+            node->location.end,
+            PM_WARN_DUPLICATED_WHEN_CLAUSE,
+            pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
+            pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
+        );
+    }
+}
+
+/**
+ * Parse all of the elements of a hash. Return true if a double splat was found.
+ */
+static bool
+parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
+    assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
+    bool contains_keyword_splat = false;
+
+    while (true) {
+        pm_node_t *element;
+
+        switch (parser->current.type) {
+            case PM_TOKEN_USTAR_STAR: {
+                parser_lex(parser);
+                pm_token_t operator = parser->previous;
+                pm_node_t *value = NULL;
+
+                if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
+                    // If we're about to parse a nested hash that is being
+                    // pushed into this hash directly with **, then we want the
+                    // inner hash to share the static literals with the outer
+                    // hash.
+                    parser->current_hash_keys = literals;
+                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+                } else if (token_begins_expression_p(parser->current.type)) {
+                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+                } else {
+                    pm_parser_scope_forwarding_keywords_check(parser, &operator);
+                }
+
+                element = UP(pm_assoc_splat_node_create(parser, value, &operator));
+                contains_keyword_splat = true;
+                break;
+            }
+            case PM_TOKEN_LABEL: {
+                pm_token_t label = parser->current;
+                parser_lex(parser);
+
+                pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
+                pm_hash_key_static_literals_add(parser, literals, key);
+
+                pm_token_t operator = not_provided(parser);
+                pm_node_t *value = NULL;
+
+                if (token_begins_expression_p(parser->current.type)) {
+                    value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
+                } else {
+                    if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
+                        pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
+                        value = UP(pm_constant_read_node_create(parser, &constant));
+                    } else {
+                        int depth = -1;
+                        pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
+
+                        if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
+                            PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
+                        } else {
+                            depth = pm_parser_local_depth(parser, &identifier);
+                        }
+
+                        if (depth == -1) {
+                            value = UP(pm_call_node_variable_call_create(parser, &identifier));
+                        } else {
+                            value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
+                        }
+                    }
+
+                    value->location.end++;
+                    value = UP(pm_implicit_node_create(parser, value));
+                }
+
+                element = UP(pm_assoc_node_create(parser, key, &operator, value));
+                break;
+            }
+            default: {
+                pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
+
+                // Hash keys that are strings are automatically frozen. We will
+                // mark that here.
+                if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
+                    pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+                }
+
+                pm_hash_key_static_literals_add(parser, literals, key);
+
+                pm_token_t operator;
+                if (pm_symbol_node_label_p(key)) {
+                    operator = not_provided(parser);
+                } else {
+                    expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
+                    operator = parser->previous;
+                }
+
+                pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+                element = UP(pm_assoc_node_create(parser, key, &operator, value));
+                break;
+            }
+        }
+
+        if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
+            pm_hash_node_elements_append((pm_hash_node_t *) node, element);
+        } else {
+            pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
+        }
+
+        // If there's no comma after the element, then we're done.
+        if (!accept1(parser, PM_TOKEN_COMMA)) break;
+
+        // If the next element starts with a label or a **, then we know we have
+        // another element in the hash, so we'll continue parsing.
+        if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
+
+        // Otherwise we need to check if the subsequent token begins an expression.
+        // If it does, then we'll continue parsing.
+        if (token_begins_expression_p(parser->current.type)) continue;
+
+        // Otherwise by default we will exit out of this loop.
+        break;
+    }
+
+    return contains_keyword_splat;
+}
+
+static inline bool
+argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
+    if (pm_symbol_node_label_p(argument)) {
+        return true;
+    }
+
+    switch (PM_NODE_TYPE(argument)) {
+        case PM_CALL_NODE: {
+            pm_call_node_t *cast = (pm_call_node_t *) argument;
+            if (cast->opening_loc.start == NULL && cast->arguments != NULL) {
+                if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+                    return false;
+                }
+                if (cast->block != NULL) {
+                    return false;
+                }
+            }
+            break;
+        }
+        default: break;
+    }
+    return accept1(parser, PM_TOKEN_EQUAL_GREATER);
+}
+
+/**
+ * Append an argument to a list of arguments.
+ */
+static inline void
+parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
+    if (arguments->arguments == NULL) {
+        arguments->arguments = pm_arguments_node_create(parser);
+    }
+
+    pm_arguments_node_arguments_append(arguments->arguments, argument);
+}
+
+/**
+ * Parse a list of arguments.
+ */
+static void
+parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
+    pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
+
+    // First we need to check if the next token is one that could be the start
+    // of an argument. If it's not, then we can just return.
+    if (
+        match2(parser, terminator, PM_TOKEN_EOF) ||
+        (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
+        context_terminator(parser->current_context->context, &parser->current)
+    ) {
+        return;
+    }
+
+    bool parsed_first_argument = false;
+    bool parsed_bare_hash = false;
+    bool parsed_block_argument = false;
+    bool parsed_forwarding_arguments = false;
+
+    while (!match1(parser, PM_TOKEN_EOF)) {
+        if (parsed_forwarding_arguments) {
+            pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
+        }
+
+        pm_node_t *argument = NULL;
+
+        switch (parser->current.type) {
+            case PM_TOKEN_USTAR_STAR:
+            case PM_TOKEN_LABEL: {
+                if (parsed_bare_hash) {
+                    pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
+                }
+
+                pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
+                argument = UP(hash);
+
+                pm_static_literals_t hash_keys = { 0 };
+                bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
+
+                parse_arguments_append(parser, arguments, argument);
+
+                pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+                if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+                pm_node_flag_set(UP(arguments->arguments), flags);
+
+                pm_static_literals_free(&hash_keys);
+                parsed_bare_hash = true;
+
+                break;
+            }
+            case PM_TOKEN_UAMPERSAND: {
+                parser_lex(parser);
+                pm_token_t operator = parser->previous;
+                pm_node_t *expression = NULL;
+
+                if (token_begins_expression_p(parser->current.type)) {
+                    expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+                } else {
+                    pm_parser_scope_forwarding_block_check(parser, &operator);
+                }
+
+                argument = UP(pm_block_argument_node_create(parser, &operator, expression));
+                if (parsed_block_argument) {
+                    parse_arguments_append(parser, arguments, argument);
+                } else {
+                    arguments->block = argument;
+                }
+
+                if (match1(parser, PM_TOKEN_COMMA)) {
+                    pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
+                }
+
+                parsed_block_argument = true;
+                break;
+            }
+            case PM_TOKEN_USTAR: {
+                parser_lex(parser);
+                pm_token_t operator = parser->previous;
+
+                if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
+                    pm_parser_scope_forwarding_positionals_check(parser, &operator);
+                    argument = UP(pm_splat_node_create(parser, &operator, NULL));
+                    if (parsed_bare_hash) {
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+                    }
+                } else {
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
+
+                    if (parsed_bare_hash) {
+                        pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+                    }
+
+                    argument = UP(pm_splat_node_create(parser, &operator, expression));
+                }
+
+                parse_arguments_append(parser, arguments, argument);
+                break;
+            }
+            case PM_TOKEN_UDOT_DOT_DOT: {
+                if (accepts_forwarding) {
+                    parser_lex(parser);
+
+                    if (token_begins_expression_p(parser->current.type)) {
+                        // If the token begins an expression then this ... was
+                        // not actually argument forwarding but was instead a
+                        // range.
+                        pm_token_t operator = parser->previous;
+                        pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+
+                        // If we parse a range, we need to validate that we
+                        // didn't accidentally violate the nonassoc rules of the
+                        // ... operator.
+                        if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
+                            pm_range_node_t *range = (pm_range_node_t *) right;
+                            pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
+                        }
+
+                        argument = UP(pm_range_node_create(parser, NULL, &operator, right));
+                    } else {
+                        pm_parser_scope_forwarding_all_check(parser, &parser->previous);
+                        if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
+                            pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
+                        }
+
+                        argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
+                        parse_arguments_append(parser, arguments, argument);
+                        pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
+                        arguments->has_forwarding = true;
+                        parsed_forwarding_arguments = true;
+                        break;
+                    }
+                }
+            }
+            PRISM_FALLTHROUGH
+            default: {
+                if (argument == NULL) {
+                    argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+                }
+
+                bool contains_keywords = false;
+                bool contains_keyword_splat = false;
+
+                if (argument_allowed_for_bare_hash(parser, argument)){
+                    if (parsed_bare_hash) {
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
+                    }
+
+                    pm_token_t operator;
+                    if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
+                        operator = parser->previous;
+                    } else {
+                        operator = not_provided(parser);
+                    }
+
+                    pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
+                    contains_keywords = true;
+
+                    // Create the set of static literals for this hash.
+                    pm_static_literals_t hash_keys = { 0 };
+                    pm_hash_key_static_literals_add(parser, &hash_keys, argument);
+
+                    // Finish parsing the one we are part way through.
+                    pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+                    argument = UP(pm_assoc_node_create(parser, argument, &operator, value));
+
+                    pm_keyword_hash_node_elements_append(bare_hash, argument);
+                    argument = UP(bare_hash);
+
+                    // Then parse more if we have a comma
+                    if (accept1(parser, PM_TOKEN_COMMA) && (
+                        token_begins_expression_p(parser->current.type) ||
+                        match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
+                    )) {
+                        contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
+                    }
+
+                    pm_static_literals_free(&hash_keys);
+                    parsed_bare_hash = true;
+                }
+
+                parse_arguments_append(parser, arguments, argument);
+
+                pm_node_flags_t flags = 0;
+                if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+                if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+                pm_node_flag_set(UP(arguments->arguments), flags);
+
+                break;
+            }
+        }
+
+        parsed_first_argument = true;
+
+        // If parsing the argument failed, we need to stop parsing arguments.
+        if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
+
+        // If the terminator of these arguments is not EOF, then we have a
+        // specific token we're looking for. In that case we can accept a
+        // newline here because it is not functioning as a statement terminator.
+        bool accepted_newline = false;
+        if (terminator != PM_TOKEN_EOF) {
+            accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
+        }
+
+        if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
+            // If we previously were on a comma and we just parsed a bare hash,
+            // then we want to continue parsing arguments. This is because the
+            // comma was grabbed up by the hash parser.
+        } else if (accept1(parser, PM_TOKEN_COMMA)) {
+            // If there was a comma, then we need to check if we also accepted a
+            // newline. If we did, then this is a syntax error.
+            if (accepted_newline) {
+                pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+            }
+
+            // If this is a command call and an argument takes a block,
+            // there can be no further arguments. For example,
+            // `foo(bar 1 do end, 2)` should be rejected.
+            if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
+                pm_call_node_t *call = (pm_call_node_t *) argument;
+                if (call->opening_loc.start == NULL && call->arguments != NULL && call->block != NULL) {
+                    pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+                    break;
+                }
+            }
+        } else {
+            // If there is no comma at the end of the argument list then we're
+            // done parsing arguments and can break out of this loop.
+            break;
+        }
+
+        // If we hit the terminator, then that means we have a trailing comma so
+        // we can accept that output as well.
+        if (match1(parser, terminator)) break;
+    }
+}
+
+/**
+ * Required parameters on method, block, and lambda declarations can be
+ * destructured using parentheses. This looks like:
+ *
+ *     def foo((bar, baz))
+ *     end
+ *
+ *
+ * It can recurse infinitely down, and splats are allowed to group arguments.
+ */
+static pm_multi_target_node_t *
+parse_required_destructured_parameter(pm_parser_t *parser) {
+    expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
+
+    pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
+    pm_multi_target_node_opening_set(node, &parser->previous);
+
+    do {
+        pm_node_t *param;
+
+        // If we get here then we have a trailing comma, which isn't allowed in
+        // the grammar. In other places, multi targets _do_ allow trailing
+        // commas, so here we'll assume this is a mistake of the user not
+        // knowing it's not allowed here.
+        if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+            param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+            pm_multi_target_node_targets_append(parser, node, param);
+            pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+            break;
+        }
+
+        if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+            param = UP(parse_required_destructured_parameter(parser));
+        } else if (accept1(parser, PM_TOKEN_USTAR)) {
+            pm_token_t star = parser->previous;
+            pm_node_t *value = NULL;
+
+            if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+                pm_token_t name = parser->previous;
+                value = UP(pm_required_parameter_node_create(parser, &name));
+                if (pm_parser_parameter_name_check(parser, &name)) {
+                    pm_node_flag_set_repeated_parameter(value);
+                }
+                pm_parser_local_add_token(parser, &name, 1);
+            }
+
+            param = UP(pm_splat_node_create(parser, &star, value));
+        } else {
+            expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
+            pm_token_t name = parser->previous;
+
+            param = UP(pm_required_parameter_node_create(parser, &name));
+            if (pm_parser_parameter_name_check(parser, &name)) {
+                pm_node_flag_set_repeated_parameter(param);
+            }
+            pm_parser_local_add_token(parser, &name, 1);
+        }
+
+        pm_multi_target_node_targets_append(parser, node, param);
+    } while (accept1(parser, PM_TOKEN_COMMA));
+
+    accept1(parser, PM_TOKEN_NEWLINE);
+    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
+    pm_multi_target_node_closing_set(node, &parser->previous);
+
+    return node;
+}
+
+/**
+ * This represents the different order states we can be in when parsing
+ * method parameters.
+ */
+typedef enum {
+    PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
+    PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
+    PM_PARAMETERS_ORDER_KEYWORDS_REST,
+    PM_PARAMETERS_ORDER_KEYWORDS,
+    PM_PARAMETERS_ORDER_REST,
+    PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
+    PM_PARAMETERS_ORDER_OPTIONAL,
+    PM_PARAMETERS_ORDER_NAMED,
+    PM_PARAMETERS_ORDER_NONE,
+} pm_parameters_order_t;
+
+/**
+ * This matches parameters tokens with parameters state.
+ */
+static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
+    [0] = PM_PARAMETERS_NO_CHANGE,
+    [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
+    [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
+    [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
+    [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
+    [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
+    [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
+    [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
+    [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
+    [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
+    [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
+    [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
+};
+
+/**
+ * Check if current parameter follows valid parameters ordering. If not it adds
+ * an error to the list without stopping the parsing, otherwise sets the
+ * parameters state to the one corresponding to the current parameter.
+ *
+ * It returns true if it was successful, and false otherwise.
+ */
+static bool
+update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
+    pm_parameters_order_t state = parameters_ordering[token->type];
+    if (state == PM_PARAMETERS_NO_CHANGE) return true;
+
+    // If we see another ordered argument after a optional argument
+    // we only continue parsing ordered arguments until we stop seeing ordered arguments.
+    if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
+        *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
+        return true;
+    } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
+        return true;
+    }
+
+    if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
+        pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
+        return false;
+    } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
+        pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
+        return false;
+    } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
+        // We know what transition we failed on, so we can provide a better error here.
+        pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
+        return false;
+    }
+
+    if (state < *current) *current = state;
+    return true;
+}
+
+/**
+ * Parse a list of parameters on a method definition.
+ */
+static pm_parameters_node_t *
+parse_parameters(
+    pm_parser_t *parser,
+    pm_binding_power_t binding_power,
+    bool uses_parentheses,
+    bool allows_trailing_comma,
+    bool allows_forwarding_parameters,
+    bool accepts_blocks_in_defaults,
+    bool in_block,
+    uint16_t depth
+) {
+    pm_do_loop_stack_push(parser, false);
+
+    pm_parameters_node_t *params = pm_parameters_node_create(parser);
+    pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
+
+    while (true) {
+        bool parsing = true;
+
+        switch (parser->current.type) {
+            case PM_TOKEN_PARENTHESIS_LEFT: {
+                update_parameter_state(parser, &parser->current, &order);
+                pm_node_t *param = UP(parse_required_destructured_parameter(parser));
+
+                if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
+                    pm_parameters_node_requireds_append(params, param);
+                } else {
+                    pm_parameters_node_posts_append(params, param);
+                }
+                break;
+            }
+            case PM_TOKEN_UAMPERSAND:
+            case PM_TOKEN_AMPERSAND: {
+                update_parameter_state(parser, &parser->current, &order);
+                parser_lex(parser);
+
+                pm_token_t operator = parser->previous;
+                pm_token_t name;
+
+                bool repeated = false;
+                if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+                    name = parser->previous;
+                    repeated = pm_parser_parameter_name_check(parser, &name);
+                    pm_parser_local_add_token(parser, &name, 1);
+                } else {
+                    name = not_provided(parser);
+                    parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
+                }
+
+                pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
+                if (repeated) {
+                    pm_node_flag_set_repeated_parameter(UP(param));
+                }
+                if (params->block == NULL) {
+                    pm_parameters_node_block_set(params, param);
+                } else {
+                    pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_BLOCK_MULTI);
+                    pm_parameters_node_posts_append(params, UP(param));
+                }
+
+                break;
+            }
+            case PM_TOKEN_UDOT_DOT_DOT: {
+                if (!allows_forwarding_parameters) {
+                    pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+                }
+
+                bool succeeded = update_parameter_state(parser, &parser->current, &order);
+                parser_lex(parser);
+
+                parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
+                pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
+
+                if (params->keyword_rest != NULL) {
+                    // If we already have a keyword rest parameter, then we replace it with the
+                    // forwarding parameter and move the keyword rest parameter to the posts list.
+                    pm_node_t *keyword_rest = params->keyword_rest;
+                    pm_parameters_node_posts_append(params, keyword_rest);
+                    if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
+                    params->keyword_rest = NULL;
+                }
+
+                pm_parameters_node_keyword_rest_set(params, UP(param));
+                break;
+            }
+            case PM_TOKEN_CLASS_VARIABLE:
+            case PM_TOKEN_IDENTIFIER:
+            case PM_TOKEN_CONSTANT:
+            case PM_TOKEN_INSTANCE_VARIABLE:
+            case PM_TOKEN_GLOBAL_VARIABLE:
+            case PM_TOKEN_METHOD_NAME: {
+                parser_lex(parser);
+                switch (parser->previous.type) {
+                    case PM_TOKEN_CONSTANT:
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+                        break;
+                    case PM_TOKEN_INSTANCE_VARIABLE:
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
+                        break;
+                    case PM_TOKEN_GLOBAL_VARIABLE:
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
+                        break;
+                    case PM_TOKEN_CLASS_VARIABLE:
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
+                        break;
+                    case PM_TOKEN_METHOD_NAME:
+                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
+                        break;
+                    default: break;
+                }
+
+                if (parser->current.type == PM_TOKEN_EQUAL) {
+                    update_parameter_state(parser, &parser->current, &order);
+                } else {
+                    update_parameter_state(parser, &parser->previous, &order);
+                }
+
+                pm_token_t name = parser->previous;
+                bool repeated = pm_parser_parameter_name_check(parser, &name);
+                pm_parser_local_add_token(parser, &name, 1);
+
+                if (match1(parser, PM_TOKEN_EQUAL)) {
+                    pm_token_t operator = parser->current;
+                    context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
+                    parser_lex(parser);
+
+                    pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
+                    uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+
+                    if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
+                    pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
+                    if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
+
+                    pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
+
+                    if (repeated) {
+                        pm_node_flag_set_repeated_parameter(UP(param));
+                    }
+                    pm_parameters_node_optionals_append(params, param);
+
+                    // If the value of the parameter increased the number of
+                    // reads of that parameter, then we need to warn that we
+                    // have a circular definition.
+                    if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
+                    }
+
+                    context_pop(parser);
+
+                    // If parsing the value of the parameter resulted in error recovery,
+                    // then we can put a missing node in its place and stop parsing the
+                    // parameters entirely now.
+                    if (parser->recovering) {
+                        parsing = false;
+                        break;
+                    }
+                } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
+                    pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
+                    if (repeated) {
+                        pm_node_flag_set_repeated_parameter(UP(param));
+                    }
+                    pm_parameters_node_requireds_append(params, UP(param));
+                } else {
+                    pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
+                    if (repeated) {
+                        pm_node_flag_set_repeated_parameter(UP(param));
+                    }
+                    pm_parameters_node_posts_append(params, UP(param));
+                }
+
+                break;
+            }
+            case PM_TOKEN_LABEL: {
+                if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
+                update_parameter_state(parser, &parser->current, &order);
+
+                context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
+                parser_lex(parser);
+
+                pm_token_t name = parser->previous;
+                pm_token_t local = name;
+                local.end -= 1;
+
+                if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
+                    pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+                } else if (local.end[-1] == '!' || local.end[-1] == '?') {
+                    PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
+                }
+
+                bool repeated = pm_parser_parameter_name_check(parser, &local);
+                pm_parser_local_add_token(parser, &local, 1);
+
+                switch (parser->current.type) {
+                    case PM_TOKEN_COMMA:
+                    case PM_TOKEN_PARENTHESIS_RIGHT:
+                    case PM_TOKEN_PIPE: {
+                        context_pop(parser);
+
+                        pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
+                        if (repeated) {
+                            pm_node_flag_set_repeated_parameter(param);
+                        }
+
+                        pm_parameters_node_keywords_append(params, param);
+                        break;
+                    }
+                    case PM_TOKEN_SEMICOLON:
+                    case PM_TOKEN_NEWLINE: {
+                        context_pop(parser);
+
+                        if (uses_parentheses) {
+                            parsing = false;
+                            break;
+                        }
+
+                        pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
+                        if (repeated) {
+                            pm_node_flag_set_repeated_parameter(param);
+                        }
+
+                        pm_parameters_node_keywords_append(params, param);
+                        break;
+                    }
+                    default: {
+                        pm_node_t *param;
+
+                        if (token_begins_expression_p(parser->current.type)) {
+                            pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
+                            uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+
+                            if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
+                            pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
+                            if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
+
+                            if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+                                PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
+                            }
+
+                            param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
+                        }
+                        else {
+                            param = UP(pm_required_keyword_parameter_node_create(parser, &name));
+                        }
+
+                        if (repeated) {
+                            pm_node_flag_set_repeated_parameter(param);
+                        }
+
+                        context_pop(parser);
+                        pm_parameters_node_keywords_append(params, param);
+
+                        // If parsing the value of the parameter resulted in error recovery,
+                        // then we can put a missing node in its place and stop parsing the
+                        // parameters entirely now.
+                        if (parser->recovering) {
+                            parsing = false;
+                            break;
+                        }
+                    }
+                }
+
+                parser->in_keyword_arg = false;
+                break;
+            }
+            case PM_TOKEN_USTAR:
+            case PM_TOKEN_STAR: {
+                update_parameter_state(parser, &parser->current, &order);
+                parser_lex(parser);
+
+                pm_token_t operator = parser->previous;
+                pm_token_t name;
+                bool repeated = false;
+
+                if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+                    name = parser->previous;
+                    repeated = pm_parser_parameter_name_check(parser, &name);
+                    pm_parser_local_add_token(parser, &name, 1);
+                } else {
+                    name = not_provided(parser);
+                    parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
+                }
+
+                pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, &name));
+                if (repeated) {
+                    pm_node_flag_set_repeated_parameter(param);
+                }
+
+                if (params->rest == NULL) {
+                    pm_parameters_node_rest_set(params, param);
+                } else {
+                    pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
+                    pm_parameters_node_posts_append(params, param);
+                }
+
+                break;
+            }
+            case PM_TOKEN_STAR_STAR:
+            case PM_TOKEN_USTAR_STAR: {
+                pm_parameters_order_t previous_order = order;
+                update_parameter_state(parser, &parser->current, &order);
+                parser_lex(parser);
+
+                pm_token_t operator = parser->previous;
+                pm_node_t *param;
+
+                if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
+                    if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
+                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
+                    }
+
+                    param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
+                } else {
+                    pm_token_t name;
+
+                    bool repeated = false;
+                    if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+                        name = parser->previous;
+                        repeated = pm_parser_parameter_name_check(parser, &name);
+                        pm_parser_local_add_token(parser, &name, 1);
+                    } else {
+                        name = not_provided(parser);
+                        parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
+                    }
+
+                    param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, &name));
+                    if (repeated) {
+                        pm_node_flag_set_repeated_parameter(param);
+                    }
+                }
+
+                if (params->keyword_rest == NULL) {
+                    pm_parameters_node_keyword_rest_set(params, param);
+                } else {
+                    pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
+                    pm_parameters_node_posts_append(params, param);
+                }
+
+                break;
+            }
+            default:
+                if (parser->previous.type == PM_TOKEN_COMMA) {
+                    if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
+                        // If we get here, then we have a trailing comma in a
+                        // block parameter list.
+                        pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+
+                        if (params->rest == NULL) {
+                            pm_parameters_node_rest_set(params, param);
+                        } else {
+                            pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
+                            pm_parameters_node_posts_append(params, UP(param));
+                        }
+                    } else {
+                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+                    }
+                }
+
+                parsing = false;
+                break;
+        }
+
+        // If we hit some kind of issue while parsing the parameter, this would
+        // have been set to false. In that case, we need to break out of the
+        // loop.
+        if (!parsing) break;
+
+        bool accepted_newline = false;
+        if (uses_parentheses) {
+            accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
+        }
+
+        if (accept1(parser, PM_TOKEN_COMMA)) {
+            // If there was a comma, but we also accepted a newline, then this
+            // is a syntax error.
+            if (accepted_newline) {
+                pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+            }
+        } else {
+            // If there was no comma, then we're done parsing parameters.
+            break;
+        }
+    }
+
+    pm_do_loop_stack_pop(parser);
+
+    // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
+    if (params->base.location.start == params->base.location.end) {
+        pm_node_destroy(parser, UP(params));
+        return NULL;
+    }
+
+    return params;
+}
+
+/**
+ * Accepts a parser returns the index of the last newline in the file that was
+ * ecorded before the current token within the newline list.
+ */
+static size_t
+token_newline_index(const pm_parser_t *parser) {
+    if (parser->heredoc_end == NULL) {
+        // This is the common case. In this case we can look at the previously
+        // recorded newline in the newline list and subtract from the current
+        // offset.
+        return parser->newline_list.size - 1;
+    } else {
+        // This is unlikely. This is the case that we have already parsed the
+        // start of a heredoc, so we cannot rely on looking at the previous
+        // offset of the newline list, and instead must go through the whole
+        // process of a binary search for the line number.
+        return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
+    }
+}
+
+/**
+ * Accepts a parser, a newline index, and a token and returns the column. The
+ * important piece of this is that it expands tabs out to the next tab stop.
+ */
+static int64_t
+token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
+    const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
+    const uint8_t *end = token->start;
+
+    // Skip over the BOM if it is present.
+    if (
+        newline_index == 0 &&
+        parser->start[0] == 0xef &&
+        parser->start[1] == 0xbb &&
+        parser->start[2] == 0xbf
+    ) cursor += 3;
+
+    int64_t column = 0;
+    for (; cursor < end; cursor++) {
+        switch (*cursor) {
+            case '\t':
+                column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
+                break;
+            case ' ':
+                column++;
+                break;
+            default:
+                column++;
+                if (break_on_non_space) return -1;
+                break;
+        }
+    }
+
+    return column;
+}
+
+/**
+ * Accepts a parser, two newline indices, and pointers to two tokens. This
+ * function warns if the indentation of the two tokens does not match.
+ */
+static void
+parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
+    // If these warnings are disabled (unlikely), then we can just return.
+    if (!parser->warn_mismatched_indentation) return;
+
+    // If the tokens are on the same line, we do not warn.
+    size_t closing_newline_index = token_newline_index(parser);
+    if (opening_newline_index == closing_newline_index) return;
+
+    // If the opening token has anything other than spaces or tabs before it,
+    // then we do not warn. This is unless we are matching up an `if`/`end` pair
+    // and the `if` immediately follows an `else` keyword.
+    int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
+    if (!if_after_else && (opening_column == -1)) return;
+
+    // Get a reference to the closing token off the current parser. This assumes
+    // that the caller has placed this in the correct position.
+    pm_token_t *closing_token = &parser->current;
+
+    // If the tokens are at the same indentation, we do not warn.
+    int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
+    if ((closing_column == -1) || (opening_column == closing_column)) return;
+
+    // If the closing column is greater than the opening column and we are
+    // allowing indentation, then we do not warn.
+    if (allow_indent && (closing_column > opening_column)) return;
+
+    // Otherwise, add a warning.
+    PM_PARSER_WARN_FORMAT(
+        parser,
+        closing_token->start,
+        closing_token->end,
+        PM_WARN_INDENTATION_MISMATCH,
+        (int) (closing_token->end - closing_token->start),
+        (const char *) closing_token->start,
+        (int) (opening_token->end - opening_token->start),
+        (const char *) opening_token->start,
+        ((int32_t) opening_newline_index) + parser->start_line
+    );
+}
+
+typedef enum {
+    PM_RESCUES_BEGIN = 1,
+    PM_RESCUES_BLOCK,
+    PM_RESCUES_CLASS,
+    PM_RESCUES_DEF,
+    PM_RESCUES_LAMBDA,
+    PM_RESCUES_MODULE,
+    PM_RESCUES_SCLASS
+} pm_rescues_type_t;
+
+/**
+ * Parse any number of rescue clauses. This will form a linked list of if
+ * nodes pointing to each other from the top.
+ */
+static inline void
+parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
+    pm_rescue_node_t *current = NULL;
+
+    while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
+        if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+        parser_lex(parser);
+
+        pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
+
+        switch (parser->current.type) {
+            case PM_TOKEN_EQUAL_GREATER: {
+                // Here we have an immediate => after the rescue keyword, in which case
+                // we're going to have an empty list of exceptions to rescue (which
+                // implies StandardError).
+                parser_lex(parser);
+                pm_rescue_node_operator_set(rescue, &parser->previous);
+
+                pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+                reference = parse_target(parser, reference, false, false);
+
+                pm_rescue_node_reference_set(rescue, reference);
+                break;
+            }
+            case PM_TOKEN_NEWLINE:
+            case PM_TOKEN_SEMICOLON:
+            case PM_TOKEN_KEYWORD_THEN:
+                // Here we have a terminator for the rescue keyword, in which
+                // case we're going to just continue on.
+                break;
+            default: {
+                if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
+                    // Here we have something that could be an exception expression, so
+                    // we'll attempt to parse it here and any others delimited by commas.
+
+                    do {
+                        pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
+                        pm_rescue_node_exceptions_append(rescue, expression);
+
+                        // If we hit a newline, then this is the end of the rescue expression. We
+                        // can continue on to parse the statements.
+                        if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
+
+                        // If we hit a `=>` then we're going to parse the exception variable. Once
+                        // we've done that, we'll break out of the loop and parse the statements.
+                        if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+                            pm_rescue_node_operator_set(rescue, &parser->previous);
+
+                            pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+                            reference = parse_target(parser, reference, false, false);
+
+                            pm_rescue_node_reference_set(rescue, reference);
+                            break;
+                        }
+                    } while (accept1(parser, PM_TOKEN_COMMA));
+                }
+            }
+        }
+
+        if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+            }
+        } else {
+            expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
+            rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+        }
+
+        if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
+            pm_accepts_block_stack_push(parser, true);
+            pm_context_t context;
+
+            switch (type) {
+                case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
+                case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
+                case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
+                case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
+                case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
+                case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
+                case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
+                default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
+            }
+
+            pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
+            if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
+
+            pm_accepts_block_stack_pop(parser);
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+        }
+
+        if (current == NULL) {
+            pm_begin_node_rescue_clause_set(parent_node, rescue);
+        } else {
+            pm_rescue_node_subsequent_set(current, rescue);
+        }
+
+        current = rescue;
+    }
+
+    // The end node locations on rescue nodes will not be set correctly
+    // since we won't know the end until we've found all subsequent
+    // clauses. This sets the end location on all rescues once we know it.
+    if (current != NULL) {
+        const uint8_t *end_to_set = current->base.location.end;
+        pm_rescue_node_t *clause = parent_node->rescue_clause;
+
+        while (clause != NULL) {
+            clause->base.location.end = end_to_set;
+            clause = clause->subsequent;
+        }
+    }
+
+    pm_token_t else_keyword;
+    if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+        if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+        opening_newline_index = token_newline_index(parser);
+
+        else_keyword = parser->current;
+        opening = &else_keyword;
+
+        parser_lex(parser);
+        accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+
+        pm_statements_node_t *else_statements = NULL;
+        if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
+            pm_accepts_block_stack_push(parser, true);
+            pm_context_t context;
+
+            switch (type) {
+                case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
+                case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
+                case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
+                case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
+                case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
+                case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
+                case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
+                default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
+            }
+
+            else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
+            pm_accepts_block_stack_pop(parser);
+
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+        }
+
+        pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
+        pm_begin_node_else_clause_set(parent_node, else_clause);
+
+        // If we don't have a `current` rescue node, then this is a dangling
+        // else, and it's an error.
+        if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
+    }
+
+    if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
+        if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+        pm_token_t ensure_keyword = parser->current;
+
+        parser_lex(parser);
+        accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+
+        pm_statements_node_t *ensure_statements = NULL;
+        if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+            pm_accepts_block_stack_push(parser, true);
+            pm_context_t context;
+
+            switch (type) {
+                case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
+                case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
+                case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
+                case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
+                case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
+                case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
+                case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
+                default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
+            }
+
+            ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
+            pm_accepts_block_stack_pop(parser);
+
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+        }
+
+        pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
+        pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
+    }
+
+    if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+        if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
+        pm_begin_node_end_keyword_set(parent_node, &parser->current);
+    } else {
+        pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+        pm_begin_node_end_keyword_set(parent_node, &end_keyword);
+    }
+}
+
+/**
+ * Parse a set of rescue clauses with an implicit begin (for example when on a
+ * class, module, def, etc.).
+ */
+static pm_begin_node_t *
+parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
+    pm_token_t begin_keyword = not_provided(parser);
+    pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
+
+    parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
+    node->base.location.start = start;
+
+    return node;
+}
+
+/**
+ * Parse a list of parameters and local on a block definition.
+ */
+static pm_block_parameters_node_t *
+parse_block_parameters(
+    pm_parser_t *parser,
+    bool allows_trailing_comma,
+    const pm_token_t *opening,
+    bool is_lambda_literal,
+    bool accepts_blocks_in_defaults,
+    uint16_t depth
+) {
+    pm_parameters_node_t *parameters = NULL;
+    if (!match1(parser, PM_TOKEN_SEMICOLON)) {
+        if (!is_lambda_literal) {
+            context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
+        }
+        parameters = parse_parameters(
+            parser,
+            is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
+            false,
+            allows_trailing_comma,
+            false,
+            accepts_blocks_in_defaults,
+            true,
+            (uint16_t) (depth + 1)
+        );
+        if (!is_lambda_literal) {
+            context_pop(parser);
+        }
+    }
+
+    pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
+    if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
+        accept1(parser, PM_TOKEN_NEWLINE);
+
+        if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+            do {
+                switch (parser->current.type) {
+                    case PM_TOKEN_CONSTANT:
+                        pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+                        parser_lex(parser);
+                        break;
+                    case PM_TOKEN_INSTANCE_VARIABLE:
+                        pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
+                        parser_lex(parser);
+                        break;
+                    case PM_TOKEN_GLOBAL_VARIABLE:
+                        pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
+                        parser_lex(parser);
+                        break;
+                    case PM_TOKEN_CLASS_VARIABLE:
+                        pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
+                        parser_lex(parser);
+                        break;
+                    default:
+                        expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
+                        break;
+                }
+
+                bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
+                pm_parser_local_add_token(parser, &parser->previous, 1);
+
+                pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
+                if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
+
+                pm_block_parameters_node_append_local(block_parameters, local);
+            } while (accept1(parser, PM_TOKEN_COMMA));
+        }
+    }
+
+    return block_parameters;
+}
+
+/**
+ * Return true if any of the visible scopes to the current context are using
+ * numbered parameters.
+ */
+static bool
+outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
+    for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
+        if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
+    }
+
+    return false;
+}
+
+/**
+ * These are the names of the various numbered parameters. We have them here so
+ * that when we insert them into the constant pool we can use a constant string
+ * and not have to allocate.
+ */
+static const char * const pm_numbered_parameter_names[] = {
+    "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
+};
+
+/**
+ * Return the node that should be used in the parameters field of a block-like
+ * (block or lambda) node, depending on the kind of parameters that were
+ * declared in the current scope.
+ */
+static pm_node_t *
+parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
+    pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+    // If we have ordinary parameters, then we will return them as the set of
+    // parameters.
+    if (parameters != NULL) {
+        // If we also have implicit parameters, then this is an error.
+        if (implicit_parameters->size > 0) {
+            pm_node_t *node = implicit_parameters->nodes[0];
+
+            if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
+                pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
+            } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+                pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
+            } else {
+                assert(false && "unreachable");
+            }
+        }
+
+        return parameters;
+    }
+
+    // If we don't have any implicit parameters, then the set of parameters is
+    // NULL.
+    if (implicit_parameters->size == 0) {
+        return NULL;
+    }
+
+    // If we don't have ordinary parameters, then we now must validate our set
+    // of implicit parameters. We can only have numbered parameters or it, but
+    // they cannot be mixed.
+    uint8_t numbered_parameter = 0;
+    bool it_parameter = false;
+
+    for (size_t index = 0; index < implicit_parameters->size; index++) {
+        pm_node_t *node = implicit_parameters->nodes[index];
+
+        if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
+            if (it_parameter) {
+                pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
+            } else if (outer_scope_using_numbered_parameters_p(parser)) {
+                pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
+            } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
+                pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
+            } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+                numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
+            } else {
+                assert(false && "unreachable");
+            }
+        } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+            if (numbered_parameter > 0) {
+                pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
+            } else {
+                it_parameter = true;
+            }
+        }
+    }
+
+    if (numbered_parameter > 0) {
+        // Go through the parent scopes and mark them as being disallowed from
+        // using numbered parameters because this inner scope is using them.
+        for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
+            scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
+        }
+
+        const pm_location_t location = { .start = opening->start, .end = closing->end };
+        return UP(pm_numbered_parameters_node_create(parser, &location, numbered_parameter));
+    }
+
+    if (it_parameter) {
+        return UP(pm_it_parameters_node_create(parser, opening, closing));
+    }
+
+    return NULL;
+}
+
+/**
+ * Parse a block.
+ */
+static pm_block_node_t *
+parse_block(pm_parser_t *parser, uint16_t depth) {
+    pm_token_t opening = parser->previous;
+    accept1(parser, PM_TOKEN_NEWLINE);
+
+    pm_accepts_block_stack_push(parser, true);
+    pm_parser_scope_push(parser, false);
+
+    pm_block_parameters_node_t *block_parameters = NULL;
+
+    if (accept1(parser, PM_TOKEN_PIPE)) {
+        pm_token_t block_parameters_opening = parser->previous;
+        if (match1(parser, PM_TOKEN_PIPE)) {
+            block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
+            parser->command_start = true;
+            parser_lex(parser);
+        } else {
+            block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
+            accept1(parser, PM_TOKEN_NEWLINE);
+            parser->command_start = true;
+            expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
+        }
+
+        pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+    }
+
+    accept1(parser, PM_TOKEN_NEWLINE);
+    pm_node_t *statements = NULL;
+
+    if (opening.type == PM_TOKEN_BRACE_LEFT) {
+        if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
+            statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
+        }
+
+        expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
+    } else {
+        if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+            if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
+                pm_accepts_block_stack_push(parser, true);
+                statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
+                pm_accepts_block_stack_pop(parser);
+            }
+
+            if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+                statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
+            }
+        }
+
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
+    pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
+
+    pm_parser_scope_pop(parser);
+    pm_accepts_block_stack_pop(parser);
+
+    return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
+}
+
+/**
+ * Parse a list of arguments and their surrounding parentheses if they are
+ * present. It returns true if it found any pieces of arguments (parentheses,
+ * arguments, or blocks).
+ */
+static bool
+parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
+    bool found = false;
+
+    if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+        found |= true;
+        arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+
+        if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+            arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+        } else {
+            pm_accepts_block_stack_push(parser, true);
+            parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
+
+            if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
+                parser->previous.start = parser->previous.end;
+                parser->previous.type = PM_TOKEN_MISSING;
+            }
+
+            pm_accepts_block_stack_pop(parser);
+            arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+        }
+    } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
+        found |= true;
+        pm_accepts_block_stack_push(parser, false);
+
+        // If we get here, then the subsequent token cannot be used as an infix
+        // operator. In this case we assume the subsequent token is part of an
+        // argument to this method call.
+        parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+
+        // If we have done with the arguments and still not consumed the comma,
+        // then we have a trailing comma where we need to check whether it is
+        // allowed or not.
+        if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
+        }
+
+        pm_accepts_block_stack_pop(parser);
+    }
+
+    // If we're at the end of the arguments, we can now check if there is a block
+    // node that starts with a {. If there is, then we can parse it and add it to
+    // the arguments.
+    if (accepts_block) {
+        pm_block_node_t *block = NULL;
+
+        if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
+            found |= true;
+            block = parse_block(parser, (uint16_t) (depth + 1));
+            pm_arguments_validate_block(parser, arguments, block);
+        } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
+            found |= true;
+            block = parse_block(parser, (uint16_t) (depth + 1));
+        }
+
+        if (block != NULL) {
+            if (arguments->block == NULL && !arguments->has_forwarding) {
+                arguments->block = UP(block);
+            } else {
+                pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
+
+                if (arguments->block != NULL) {
+                    if (arguments->arguments == NULL) {
+                        arguments->arguments = pm_arguments_node_create(parser);
+                    }
+                    pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
+                }
+                arguments->block = UP(block);
+            }
+        }
+    }
+
+    return found;
+}
+
+/**
+ * Check that the return is allowed in the current context. If it isn't, add an
+ * error to the parser.
+ */
+static void
+parse_return(pm_parser_t *parser, pm_node_t *node) {
+    bool in_sclass = false;
+    for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
+        switch (context_node->context) {
+            case PM_CONTEXT_BEGIN_ELSE:
+            case PM_CONTEXT_BEGIN_ENSURE:
+            case PM_CONTEXT_BEGIN_RESCUE:
+            case PM_CONTEXT_BEGIN:
+            case PM_CONTEXT_CASE_IN:
+            case PM_CONTEXT_CASE_WHEN:
+            case PM_CONTEXT_DEFAULT_PARAMS:
+            case PM_CONTEXT_DEFINED:
+            case PM_CONTEXT_ELSE:
+            case PM_CONTEXT_ELSIF:
+            case PM_CONTEXT_EMBEXPR:
+            case PM_CONTEXT_FOR_INDEX:
+            case PM_CONTEXT_FOR:
+            case PM_CONTEXT_IF:
+            case PM_CONTEXT_LOOP_PREDICATE:
+            case PM_CONTEXT_MAIN:
+            case PM_CONTEXT_MULTI_TARGET:
+            case PM_CONTEXT_PARENS:
+            case PM_CONTEXT_POSTEXE:
+            case PM_CONTEXT_PREDICATE:
+            case PM_CONTEXT_PREEXE:
+            case PM_CONTEXT_RESCUE_MODIFIER:
+            case PM_CONTEXT_TERNARY:
+            case PM_CONTEXT_UNLESS:
+            case PM_CONTEXT_UNTIL:
+            case PM_CONTEXT_WHILE:
+                // Keep iterating up the lists of contexts, because returns can
+                // see through these.
+                continue;
+            case PM_CONTEXT_SCLASS_ELSE:
+            case PM_CONTEXT_SCLASS_ENSURE:
+            case PM_CONTEXT_SCLASS_RESCUE:
+            case PM_CONTEXT_SCLASS:
+                in_sclass = true;
+                continue;
+            case PM_CONTEXT_CLASS_ELSE:
+            case PM_CONTEXT_CLASS_ENSURE:
+            case PM_CONTEXT_CLASS_RESCUE:
+            case PM_CONTEXT_CLASS:
+            case PM_CONTEXT_MODULE_ELSE:
+            case PM_CONTEXT_MODULE_ENSURE:
+            case PM_CONTEXT_MODULE_RESCUE:
+            case PM_CONTEXT_MODULE:
+                // These contexts are invalid for a return.
+                pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
+                return;
+            case PM_CONTEXT_BLOCK_BRACES:
+            case PM_CONTEXT_BLOCK_ELSE:
+            case PM_CONTEXT_BLOCK_ENSURE:
+            case PM_CONTEXT_BLOCK_KEYWORDS:
+            case PM_CONTEXT_BLOCK_RESCUE:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
+            case PM_CONTEXT_DEF_ELSE:
+            case PM_CONTEXT_DEF_ENSURE:
+            case PM_CONTEXT_DEF_PARAMS:
+            case PM_CONTEXT_DEF_RESCUE:
+            case PM_CONTEXT_DEF:
+            case PM_CONTEXT_LAMBDA_BRACES:
+            case PM_CONTEXT_LAMBDA_DO_END:
+            case PM_CONTEXT_LAMBDA_ELSE:
+            case PM_CONTEXT_LAMBDA_ENSURE:
+            case PM_CONTEXT_LAMBDA_RESCUE:
+                // These contexts are valid for a return, and we should not
+                // continue to loop.
+                return;
+            case PM_CONTEXT_NONE:
+                // This case should never happen.
+                assert(false && "unreachable");
+                break;
+        }
+    }
+    if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
+        pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
+    }
+}
+
+/**
+ * Check that the block exit (next, break, redo) is allowed in the current
+ * context. If it isn't, add an error to the parser.
+ */
+static void
+parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
+    for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
+        switch (context_node->context) {
+            case PM_CONTEXT_BLOCK_BRACES:
+            case PM_CONTEXT_BLOCK_KEYWORDS:
+            case PM_CONTEXT_BLOCK_ELSE:
+            case PM_CONTEXT_BLOCK_ENSURE:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
+            case PM_CONTEXT_BLOCK_RESCUE:
+            case PM_CONTEXT_DEFINED:
+            case PM_CONTEXT_FOR:
+            case PM_CONTEXT_LAMBDA_BRACES:
+            case PM_CONTEXT_LAMBDA_DO_END:
+            case PM_CONTEXT_LAMBDA_ELSE:
+            case PM_CONTEXT_LAMBDA_ENSURE:
+            case PM_CONTEXT_LAMBDA_RESCUE:
+            case PM_CONTEXT_LOOP_PREDICATE:
+            case PM_CONTEXT_POSTEXE:
+            case PM_CONTEXT_UNTIL:
+            case PM_CONTEXT_WHILE:
+                // These are the good cases. We're allowed to have a block exit
+                // in these contexts.
+                return;
+            case PM_CONTEXT_DEF:
+            case PM_CONTEXT_DEF_PARAMS:
+            case PM_CONTEXT_DEF_ELSE:
+            case PM_CONTEXT_DEF_ENSURE:
+            case PM_CONTEXT_DEF_RESCUE:
+            case PM_CONTEXT_MAIN:
+            case PM_CONTEXT_PREEXE:
+            case PM_CONTEXT_SCLASS:
+            case PM_CONTEXT_SCLASS_ELSE:
+            case PM_CONTEXT_SCLASS_ENSURE:
+            case PM_CONTEXT_SCLASS_RESCUE:
+                // These are the bad cases. We're not allowed to have a block
+                // exit in these contexts.
+                //
+                // If we get here, then we're about to mark this block exit
+                // as invalid. However, it could later _become_ valid if we
+                // find a trailing while/until on the expression. In this
+                // case instead of adding the error here, we'll add the
+                // block exit to the list of exits for the expression, and
+                // the node parsing will handle validating it instead.
+                assert(parser->current_block_exits != NULL);
+                pm_node_list_append(parser->current_block_exits, node);
+                return;
+            case PM_CONTEXT_BEGIN_ELSE:
+            case PM_CONTEXT_BEGIN_ENSURE:
+            case PM_CONTEXT_BEGIN_RESCUE:
+            case PM_CONTEXT_BEGIN:
+            case PM_CONTEXT_CASE_IN:
+            case PM_CONTEXT_CASE_WHEN:
+            case PM_CONTEXT_CLASS_ELSE:
+            case PM_CONTEXT_CLASS_ENSURE:
+            case PM_CONTEXT_CLASS_RESCUE:
+            case PM_CONTEXT_CLASS:
+            case PM_CONTEXT_DEFAULT_PARAMS:
+            case PM_CONTEXT_ELSE:
+            case PM_CONTEXT_ELSIF:
+            case PM_CONTEXT_EMBEXPR:
+            case PM_CONTEXT_FOR_INDEX:
+            case PM_CONTEXT_IF:
+            case PM_CONTEXT_MODULE_ELSE:
+            case PM_CONTEXT_MODULE_ENSURE:
+            case PM_CONTEXT_MODULE_RESCUE:
+            case PM_CONTEXT_MODULE:
+            case PM_CONTEXT_MULTI_TARGET:
+            case PM_CONTEXT_PARENS:
+            case PM_CONTEXT_PREDICATE:
+            case PM_CONTEXT_RESCUE_MODIFIER:
+            case PM_CONTEXT_TERNARY:
+            case PM_CONTEXT_UNLESS:
+                // In these contexts we should continue walking up the list of
+                // contexts.
+                break;
+            case PM_CONTEXT_NONE:
+                // This case should never happen.
+                assert(false && "unreachable");
+                break;
+        }
+    }
+}
+
+/**
+ * When we hit an expression that could contain block exits, we need to stash
+ * the previous set and create a new one.
+ */
+static pm_node_list_t *
+push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
+    pm_node_list_t *previous_block_exits = parser->current_block_exits;
+    parser->current_block_exits = current_block_exits;
+    return previous_block_exits;
+}
+
+/**
+ * If we did not match a trailing while/until and this was the last chance to do
+ * so, then all of the block exits in the list are invalid and we need to add an
+ * error for each of them.
+ */
+static void
+flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
+    pm_node_t *block_exit;
+    PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
+        const char *type;
+
+        switch (PM_NODE_TYPE(block_exit)) {
+            case PM_BREAK_NODE: type = "break"; break;
+            case PM_NEXT_NODE: type = "next"; break;
+            case PM_REDO_NODE: type = "redo"; break;
+            default: assert(false && "unreachable"); type = ""; break;
+        }
+
+        PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
+    }
+
+    parser->current_block_exits = previous_block_exits;
+}
+
+/**
+ * Pop the current level of block exits from the parser, and add errors to the
+ * parser if any of them are deemed to be invalid.
+ */
+static void
+pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
+    if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
+        // If we matched a trailing while/until, then all of the block exits in
+        // the contained list are valid. In this case we do not need to do
+        // anything.
+        parser->current_block_exits = previous_block_exits;
+    } else if (previous_block_exits != NULL) {
+        // If we did not matching a trailing while/until, then all of the block
+        // exits contained in the list are invalid for this specific context.
+        // However, they could still become valid in a higher level context if
+        // there is another list above this one. In this case we'll push all of
+        // the block exits up to the previous list.
+        pm_node_list_concat(previous_block_exits, parser->current_block_exits);
+        parser->current_block_exits = previous_block_exits;
+    } else {
+        // If we did not match a trailing while/until and this was the last
+        // chance to do so, then all of the block exits in the list are invalid
+        // and we need to add an error for each of them.
+        flush_block_exits(parser, previous_block_exits);
+    }
+}
+
+static inline pm_node_t *
+parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
+    context_push(parser, PM_CONTEXT_PREDICATE);
+    pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
+    pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
+
+    // Predicates are closed by a term, a "then", or a term and then a "then".
+    bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+
+    if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+        predicate_closed = true;
+        *then_keyword = parser->previous;
+    }
+
+    if (!predicate_closed) {
+        pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
+    }
+
+    context_pop(parser);
+    return predicate;
+}
+
+static inline pm_node_t *
+parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    pm_token_t keyword = parser->previous;
+    pm_token_t then_keyword = not_provided(parser);
+
+    pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
+    pm_statements_node_t *statements = NULL;
+
+    if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+        pm_accepts_block_stack_push(parser, true);
+        statements = parse_statements(parser, context, (uint16_t) (depth + 1));
+        pm_accepts_block_stack_pop(parser);
+        accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+    }
+
+    pm_token_t end_keyword = not_provided(parser);
+    pm_node_t *parent = NULL;
+
+    switch (context) {
+        case PM_CONTEXT_IF:
+            parent = UP(pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
+            break;
+        case PM_CONTEXT_UNLESS:
+            parent = UP(pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements));
+            break;
+        default:
+            assert(false && "unreachable");
+            break;
+    }
+
+    pm_node_t *current = parent;
+
+    // Parse any number of elsif clauses. This will form a linked list of if
+    // nodes pointing to each other from the top.
+    if (context == PM_CONTEXT_IF) {
+        while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
+            if (parser_end_of_line_p(parser)) {
+                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+            }
+
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+            pm_token_t elsif_keyword = parser->current;
+            parser_lex(parser);
+
+            pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
+            pm_accepts_block_stack_push(parser, true);
+
+            pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
+            pm_accepts_block_stack_pop(parser);
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+
+            pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword));
+            ((pm_if_node_t *) current)->subsequent = elsif;
+            current = elsif;
+        }
+    }
+
+    if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+        opening_newline_index = token_newline_index(parser);
+
+        parser_lex(parser);
+        pm_token_t else_keyword = parser->previous;
+
+        pm_accepts_block_stack_push(parser, true);
+        pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
+        pm_accepts_block_stack_pop(parser);
+
+        accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
+
+        pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
+
+        switch (context) {
+            case PM_CONTEXT_IF:
+                ((pm_if_node_t *) current)->subsequent = UP(else_node);
+                break;
+            case PM_CONTEXT_UNLESS:
+                ((pm_unless_node_t *) parent)->else_clause = else_node;
+                break;
+            default:
+                assert(false && "unreachable");
+                break;
+        }
+    } else {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
+    }
+
+    // Set the appropriate end location for all of the nodes in the subtree.
+    switch (context) {
+        case PM_CONTEXT_IF: {
+            pm_node_t *current = parent;
+            bool recursing = true;
+
+            while (recursing) {
+                switch (PM_NODE_TYPE(current)) {
+                    case PM_IF_NODE:
+                        pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
+                        current = ((pm_if_node_t *) current)->subsequent;
+                        recursing = current != NULL;
+                        break;
+                    case PM_ELSE_NODE:
+                        pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
+                        recursing = false;
+                        break;
+                    default: {
+                        recursing = false;
+                        break;
+                    }
+                }
+            }
+            break;
+        }
+        case PM_CONTEXT_UNLESS:
+            pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
+            break;
+        default:
+            assert(false && "unreachable");
+            break;
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+    pm_node_list_free(&current_block_exits);
+
+    return parent;
+}
+
+/**
+ * This macro allows you to define a case statement for all of the keywords.
+ * It's meant to be used in a switch statement.
+ */
+#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
+    case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
+    case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
+    case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
+    case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
+    case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
+    case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
+    case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
+    case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
+    case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
+    case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
+
+/**
+ * This macro allows you to define a case statement for all of the operators.
+ * It's meant to be used in a switch statement.
+ */
+#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
+    case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
+    case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
+    case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
+    case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
+    case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
+    case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
+    case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
+
+/**
+ * This macro allows you to define a case statement for all of the token types
+ * that represent the beginning of nodes that are "primitives" in a pattern
+ * matching expression.
+ */
+#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
+    case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
+    case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
+    case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
+    case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
+    case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
+    case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
+    case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
+    case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
+
+/**
+ * This macro allows you to define a case statement for all of the token types
+ * that could begin a parameter.
+ */
+#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
+    case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
+    case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
+    case PM_TOKEN_CLASS_VARIABLE
+
+/**
+ * This macro allows you to define a case statement for all of the nodes that
+ * can be transformed into write targets.
+ */
+#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
+    case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
+    case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
+    case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
+
+// Assert here that the flags are the same so that we can safely switch the type
+// of the node without having to move the flags.
+PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
+
+/**
+ * If the encoding was explicitly set through the lexing process, then we need
+ * to potentially mark the string's flags to indicate how to encode it.
+ */
+static inline pm_node_flags_t
+parse_unescaped_encoding(const pm_parser_t *parser) {
+    if (parser->explicit_encoding != NULL) {
+        if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            // If the there's an explicit encoding and it's using a UTF-8 escape
+            // sequence, then mark the string as UTF-8.
+            return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
+        } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+            // If there's a non-UTF-8 escape sequence being used, then the
+            // string uses the source encoding, unless the source is marked as
+            // US-ASCII. In that case the string is forced as ASCII-8BIT in
+            // order to keep the string valid.
+            return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
+        }
+    }
+    return 0;
+}
+
+/**
+ * Parse a node that is part of a string. If the subsequent tokens cannot be
+ * parsed as a string part, then NULL is returned.
+ */
+static pm_node_t *
+parse_string_part(pm_parser_t *parser, uint16_t depth) {
+    switch (parser->current.type) {
+        // Here the lexer has returned to us plain string content. In this case
+        // we'll create a string node that has no opening or closing and return that
+        // as the part. These kinds of parts look like:
+        //
+        //     "aaa #{bbb} #@ccc ddd"
+        //      ^^^^      ^     ^^^^
+        case PM_TOKEN_STRING_CONTENT: {
+            pm_token_t opening = not_provided(parser);
+            pm_token_t closing = not_provided(parser);
+
+            pm_node_t *node = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+            pm_node_flag_set(node, parse_unescaped_encoding(parser));
+
+            parser_lex(parser);
+            return node;
+        }
+        // Here the lexer has returned the beginning of an embedded expression. In
+        // that case we'll parse the inner statements and return that as the part.
+        // These kinds of parts look like:
+        //
+        //     "aaa #{bbb} #@ccc ddd"
+        //          ^^^^^^
+        case PM_TOKEN_EMBEXPR_BEGIN: {
+            // Ruby disallows seeing encoding around interpolation in strings,
+            // even though it is known at parse time.
+            parser->explicit_encoding = NULL;
+
+            pm_lex_state_t state = parser->lex_state;
+            int brace_nesting = parser->brace_nesting;
+
+            parser->brace_nesting = 0;
+            lex_state_set(parser, PM_LEX_STATE_BEG);
+            parser_lex(parser);
+
+            pm_token_t opening = parser->previous;
+            pm_statements_node_t *statements = NULL;
+
+            if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
+                pm_accepts_block_stack_push(parser, true);
+                statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
+                pm_accepts_block_stack_pop(parser);
+            }
+
+            parser->brace_nesting = brace_nesting;
+            lex_state_set(parser, state);
+
+            expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
+            pm_token_t closing = parser->previous;
+
+            // If this set of embedded statements only contains a single
+            // statement, then Ruby does not consider it as a possible statement
+            // that could emit a line event.
+            if (statements != NULL && statements->body.size == 1) {
+                pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
+            }
+
+            return UP(pm_embedded_statements_node_create(parser, &opening, statements, &closing));
+        }
+
+        // Here the lexer has returned the beginning of an embedded variable.
+        // In that case we'll parse the variable and create an appropriate node
+        // for it and then return that node. These kinds of parts look like:
+        //
+        //     "aaa #{bbb} #@ccc ddd"
+        //                 ^^^^^
+        case PM_TOKEN_EMBVAR: {
+            // Ruby disallows seeing encoding around interpolation in strings,
+            // even though it is known at parse time.
+            parser->explicit_encoding = NULL;
+
+            lex_state_set(parser, PM_LEX_STATE_BEG);
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *variable;
+
+            switch (parser->current.type) {
+                // In this case a back reference is being interpolated. We'll
+                // create a global variable read node.
+                case PM_TOKEN_BACK_REFERENCE:
+                    parser_lex(parser);
+                    variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
+                    break;
+                // In this case an nth reference is being interpolated. We'll
+                // create a global variable read node.
+                case PM_TOKEN_NUMBERED_REFERENCE:
+                    parser_lex(parser);
+                    variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
+                    break;
+                // In this case a global variable is being interpolated. We'll
+                // create a global variable read node.
+                case PM_TOKEN_GLOBAL_VARIABLE:
+                    parser_lex(parser);
+                    variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
+                    break;
+                // In this case an instance variable is being interpolated.
+                // We'll create an instance variable read node.
+                case PM_TOKEN_INSTANCE_VARIABLE:
+                    parser_lex(parser);
+                    variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
+                    break;
+                // In this case a class variable is being interpolated. We'll
+                // create a class variable read node.
+                case PM_TOKEN_CLASS_VARIABLE:
+                    parser_lex(parser);
+                    variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
+                    break;
+                // We can hit here if we got an invalid token. In that case
+                // we'll not attempt to lex this token and instead just return a
+                // missing node.
+                default:
+                    expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
+                    variable = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+                    break;
+            }
+
+            return UP(pm_embedded_variable_node_create(parser, &operator, variable));
+        }
+        default:
+            parser_lex(parser);
+            pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
+            return NULL;
+    }
+}
+
+/**
+ * When creating a symbol, unary operators that cannot be binary operators
+ * automatically drop trailing `@` characters. This happens at the parser level,
+ * such that `~@` is parsed as `~` and `!@` is parsed as `!`. We do that here.
+ */
+static const uint8_t *
+parse_operator_symbol_name(const pm_token_t *name) {
+    switch (name->type) {
+        case PM_TOKEN_TILDE:
+        case PM_TOKEN_BANG:
+            if (name->end[-1] == '@') return name->end - 1;
+        PRISM_FALLTHROUGH
+        default:
+            return name->end;
+    }
+}
+
+static pm_node_t *
+parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
+    pm_token_t closing = not_provided(parser);
+    pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
+
+    const uint8_t *end = parse_operator_symbol_name(&parser->current);
+
+    if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
+    parser_lex(parser);
+
+    pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
+    pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
+
+    return UP(symbol);
+}
+
+/**
+ * Parse a symbol node. This function will get called immediately after finding
+ * a symbol opening token. This handles parsing bare symbols and interpolated
+ * symbols.
+ */
+static pm_node_t *
+parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
+    const pm_token_t opening = parser->previous;
+
+    if (lex_mode->mode != PM_LEX_STRING) {
+        if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
+
+        switch (parser->current.type) {
+            case PM_CASE_OPERATOR:
+                return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
+            case PM_TOKEN_IDENTIFIER:
+            case PM_TOKEN_CONSTANT:
+            case PM_TOKEN_INSTANCE_VARIABLE:
+            case PM_TOKEN_METHOD_NAME:
+            case PM_TOKEN_CLASS_VARIABLE:
+            case PM_TOKEN_GLOBAL_VARIABLE:
+            case PM_TOKEN_NUMBERED_REFERENCE:
+            case PM_TOKEN_BACK_REFERENCE:
+            case PM_CASE_KEYWORD:
+                parser_lex(parser);
+                break;
+            default:
+                expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
+                break;
+        }
+
+        pm_token_t closing = not_provided(parser);
+        pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
+
+        pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
+        pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+
+        return UP(symbol);
+    }
+
+    if (lex_mode->as.string.interpolation) {
+        // If we have the end of the symbol, then we can return an empty symbol.
+        if (match1(parser, PM_TOKEN_STRING_END)) {
+            if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
+            parser_lex(parser);
+
+            pm_token_t content = not_provided(parser);
+            pm_token_t closing = parser->previous;
+            return UP(pm_symbol_node_create(parser, &opening, &content, &closing));
+        }
+
+        // Now we can parse the first part of the symbol.
+        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+
+        // If we got a string part, then it's possible that we could transform
+        // what looks like an interpolated symbol into a regular symbol.
+        if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+            if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
+            expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
+
+            return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
+        }
+
+        pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
+        if (part) pm_interpolated_symbol_node_append(symbol, part);
+
+        while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+            if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+                pm_interpolated_symbol_node_append(symbol, part);
+            }
+        }
+
+        if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
+        if (match1(parser, PM_TOKEN_EOF)) {
+            pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
+        } else {
+            expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
+        }
+
+        pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
+        return UP(symbol);
+    }
+
+    pm_token_t content;
+    pm_string_t unescaped;
+
+    if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+        content = parser->current;
+        unescaped = parser->current_string;
+        parser_lex(parser);
+
+        // If we have two string contents in a row, then the content of this
+        // symbol is split because of heredoc contents. This looks like:
+        //
+        // <<A; :'a
+        // A
+        // b'
+        //
+        // In this case, the best way we have to represent this is as an
+        // interpolated string node, so that's what we'll do here.
+        if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+            pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
+            pm_token_t bounds = not_provided(parser);
+
+            pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped));
+            pm_interpolated_symbol_node_append(symbol, part);
+
+            part = UP(pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string));
+            pm_interpolated_symbol_node_append(symbol, part);
+
+            if (next_state != PM_LEX_STATE_NONE) {
+                lex_state_set(parser, next_state);
+            }
+
+            parser_lex(parser);
+            expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
+
+            pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
+            return UP(symbol);
+        }
+    } else {
+        content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
+        pm_string_shared_init(&unescaped, content.start, content.end);
+    }
+
+    if (next_state != PM_LEX_STATE_NONE) {
+        lex_state_set(parser, next_state);
+    }
+
+    if (match1(parser, PM_TOKEN_EOF)) {
+        pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
+    } else {
+        expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
+    }
+
+    return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
+}
+
+/**
+ * Parse an argument to undef which can either be a bare word, a symbol, a
+ * constant, or an interpolated symbol.
+ */
+static inline pm_node_t *
+parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
+    switch (parser->current.type) {
+        case PM_CASE_OPERATOR: {
+            const pm_token_t opening = not_provided(parser);
+            return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
+        }
+        case PM_CASE_KEYWORD:
+        case PM_TOKEN_CONSTANT:
+        case PM_TOKEN_IDENTIFIER:
+        case PM_TOKEN_METHOD_NAME: {
+            parser_lex(parser);
+
+            pm_token_t opening = not_provided(parser);
+            pm_token_t closing = not_provided(parser);
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
+
+            pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
+            pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+
+            return UP(symbol);
+        }
+        case PM_TOKEN_SYMBOL_BEGIN: {
+            pm_lex_mode_t lex_mode = *parser->lex_modes.current;
+            parser_lex(parser);
+
+            return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
+        }
+        default:
+            pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
+            return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+    }
+}
+
+/**
+ * Parse an argument to alias which can either be a bare word, a symbol, an
+ * interpolated symbol or a global variable. If this is the first argument, then
+ * we need to set the lex state to PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM
+ * between the first and second arguments.
+ */
+static inline pm_node_t *
+parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
+    switch (parser->current.type) {
+        case PM_CASE_OPERATOR: {
+            const pm_token_t opening = not_provided(parser);
+            return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
+        }
+        case PM_CASE_KEYWORD:
+        case PM_TOKEN_CONSTANT:
+        case PM_TOKEN_IDENTIFIER:
+        case PM_TOKEN_METHOD_NAME: {
+            if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
+            parser_lex(parser);
+
+            pm_token_t opening = not_provided(parser);
+            pm_token_t closing = not_provided(parser);
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
+
+            pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
+            pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+
+            return UP(symbol);
+        }
+        case PM_TOKEN_SYMBOL_BEGIN: {
+            pm_lex_mode_t lex_mode = *parser->lex_modes.current;
+            parser_lex(parser);
+
+            return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
+        }
+        case PM_TOKEN_BACK_REFERENCE:
+            parser_lex(parser);
+            return UP(pm_back_reference_read_node_create(parser, &parser->previous));
+        case PM_TOKEN_NUMBERED_REFERENCE:
+            parser_lex(parser);
+            return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
+        case PM_TOKEN_GLOBAL_VARIABLE:
+            parser_lex(parser);
+            return UP(pm_global_variable_read_node_create(parser, &parser->previous));
+        default:
+            pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
+            return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+    }
+}
+
+/**
+ * Parse an identifier into either a local variable read. If the local variable
+ * is not found, it returns NULL instead.
+ */
+static pm_node_t *
+parse_variable(pm_parser_t *parser) {
+    pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
+    int depth;
+    bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
+
+    if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
+        return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
+    }
+
+    pm_scope_t *current_scope = parser->current_scope;
+    if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
+        if (is_numbered_param) {
+            // When you use a numbered parameter, it implies the existence of
+            // all of the locals that exist before it. For example, referencing
+            // _2 means that _1 must exist. Therefore here we loop through all
+            // of the possibilities and add them into the constant pool.
+            uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
+            for (uint8_t number = 1; number <= maximum; number++) {
+                pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
+            }
+
+            if (!match1(parser, PM_TOKEN_EQUAL)) {
+                parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
+            }
+
+            pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
+            pm_node_list_append(&current_scope->implicit_parameters, node);
+
+            return node;
+        } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
+            pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
+            pm_node_list_append(&current_scope->implicit_parameters, node);
+
+            return node;
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * Parse an identifier into either a local variable read or a call.
+ */
+static pm_node_t *
+parse_variable_call(pm_parser_t *parser) {
+    pm_node_flags_t flags = 0;
+
+    if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
+        pm_node_t *node = parse_variable(parser);
+        if (node != NULL) return node;
+        flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
+    }
+
+    pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
+    pm_node_flag_set(UP(node), flags);
+
+    return UP(node);
+}
+
+/**
+ * Parse the method definition name based on the current token available on the
+ * parser. If it does not match a valid method definition name, then a missing
+ * token is returned.
+ */
+static inline pm_token_t
+parse_method_definition_name(pm_parser_t *parser) {
+    switch (parser->current.type) {
+        case PM_CASE_KEYWORD:
+        case PM_TOKEN_CONSTANT:
+        case PM_TOKEN_METHOD_NAME:
+            parser_lex(parser);
+            return parser->previous;
+        case PM_TOKEN_IDENTIFIER:
+            pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
+            parser_lex(parser);
+            return parser->previous;
+        case PM_CASE_OPERATOR:
+            lex_state_set(parser, PM_LEX_STATE_ENDFN);
+            parser_lex(parser);
+            return parser->previous;
+        default:
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
+            return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
+    }
+}
+
+static void
+parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
+    // Get a reference to the string struct that is being held by the string
+    // node. This is the value we're going to actually manipulate.
+    pm_string_ensure_owned(string);
+
+    // Now get the bounds of the existing string. We'll use this as a
+    // destination to move bytes into. We'll also use it for bounds checking
+    // since we don't require that these strings be null terminated.
+    size_t dest_length = pm_string_length(string);
+    const uint8_t *source_cursor = (uint8_t *) string->source;
+    const uint8_t *source_end = source_cursor + dest_length;
+
+    // We're going to move bytes backward in the string when we get leading
+    // whitespace, so we'll maintain a pointer to the current position in the
+    // string that we're writing to.
+    size_t trimmed_whitespace = 0;
+
+    // While we haven't reached the amount of common whitespace that we need to
+    // trim and we haven't reached the end of the string, we'll keep trimming
+    // whitespace. Trimming in this context means skipping over these bytes such
+    // that they aren't copied into the new string.
+    while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
+        if (*source_cursor == '\t') {
+            trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
+            if (trimmed_whitespace > common_whitespace) break;
+        } else {
+            trimmed_whitespace++;
+        }
+
+        source_cursor++;
+        dest_length--;
+    }
+
+    memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
+    string->length = dest_length;
+}
+
+/**
+ * Take a heredoc node that is indented by a ~ and trim the leading whitespace.
+ */
+static void
+parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
+    // The next node should be dedented if it's the first node in the list or if
+    // it follows a string node.
+    bool dedent_next = true;
+
+    // Iterate over all nodes, and trim whitespace accordingly. We're going to
+    // keep around two indices: a read and a write. If we end up trimming all of
+    // the whitespace from a node, then we'll drop it from the list entirely.
+    size_t write_index = 0;
+
+    pm_node_t *node;
+    PM_NODE_LIST_FOREACH(nodes, read_index, node) {
+        // We're not manipulating child nodes that aren't strings. In this case
+        // we'll skip past it and indicate that the subsequent node should not
+        // be dedented.
+        if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
+            nodes->nodes[write_index++] = node;
+            dedent_next = false;
+            continue;
+        }
+
+        pm_string_node_t *string_node = ((pm_string_node_t *) node);
+        if (dedent_next) {
+            parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
+        }
+
+        if (string_node->unescaped.length == 0) {
+            pm_node_destroy(parser, node);
+        } else {
+            nodes->nodes[write_index++] = node;
+        }
+
+        // We always dedent the next node if it follows a string node.
+        dedent_next = true;
+    }
+
+    nodes->size = write_index;
+}
+
+/**
+ * Return a string content token at a particular location that is empty.
+ */
+static pm_token_t
+parse_strings_empty_content(const uint8_t *location) {
+    return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
+}
+
+/**
+ * Parse a set of strings that could be concatenated together.
+ */
+static inline pm_node_t *
+parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
+    assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
+    bool concating = false;
+
+    while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+        pm_node_t *node = NULL;
+
+        // Here we have found a string literal. We'll parse it and add it to
+        // the list of strings.
+        const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+        assert(lex_mode->mode == PM_LEX_STRING);
+        bool lex_interpolation = lex_mode->as.string.interpolation;
+        bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
+
+        pm_token_t opening = parser->current;
+        parser_lex(parser);
+
+        if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+            expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
+            // If we get here, then we have an end immediately after a
+            // start. In that case we'll create an empty content token and
+            // return an uninterpolated string.
+            pm_token_t content = parse_strings_empty_content(parser->previous.start);
+            pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
+
+            pm_string_shared_init(&string->unescaped, content.start, content.end);
+            node = UP(string);
+        } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+            // If we get here, then we have an end of a label immediately
+            // after a start. In that case we'll create an empty symbol
+            // node.
+            pm_token_t content = parse_strings_empty_content(parser->previous.start);
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+
+            pm_string_shared_init(&symbol->unescaped, content.start, content.end);
+            node = UP(symbol);
+
+            if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+        } else if (!lex_interpolation) {
+            // If we don't accept interpolation then we expect the string to
+            // start with a single string content node.
+            pm_string_t unescaped;
+            pm_token_t content;
+
+            if (match1(parser, PM_TOKEN_EOF)) {
+                unescaped = PM_STRING_EMPTY;
+                content = not_provided(parser);
+            } else {
+                unescaped = parser->current_string;
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
+                content = parser->previous;
+            }
+
+            // It is unfortunately possible to have multiple string content
+            // nodes in a row in the case that there's heredoc content in
+            // the middle of the string, like this cursed example:
+            //
+            // <<-END+'b
+            //  a
+            // END
+            //  c'+'d'
+            //
+            // In that case we need to switch to an interpolated string to
+            // be able to contain all of the parts.
+            if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                pm_node_list_t parts = { 0 };
+
+                pm_token_t delimiters = not_provided(parser);
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped));
+                pm_node_list_append(&parts, part);
+
+                do {
+                    part = UP(pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters));
+                    pm_node_list_append(&parts, part);
+                    parser_lex(parser);
+                } while (match1(parser, PM_TOKEN_STRING_CONTENT));
+
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
+                node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
+
+                pm_node_list_free(&parts);
+            } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+                node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
+                if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+            } else if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
+            } else if (accept1(parser, PM_TOKEN_STRING_END)) {
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
+            } else {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
+                parser->previous.start = parser->previous.end;
+                parser->previous.type = PM_TOKEN_MISSING;
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
+            }
+        } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+            // In this case we've hit string content so we know the string
+            // at least has something in it. We'll need to check if the
+            // following token is the end (in which case we can return a
+            // plain string) or if it's not then it has interpolation.
+            pm_token_t content = parser->current;
+            pm_string_t unescaped = parser->current_string;
+            parser_lex(parser);
+
+            if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+                node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
+                pm_node_flag_set(node, parse_unescaped_encoding(parser));
+
+                // Kind of odd behavior, but basically if we have an
+                // unterminated string and it ends in a newline, we back up one
+                // character so that the error message is on the last line of
+                // content in the string.
+                if (!accept1(parser, PM_TOKEN_STRING_END)) {
+                    const uint8_t *location = parser->previous.end;
+                    if (location > parser->start && location[-1] == '\n') location--;
+                    pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
+
+                    parser->previous.start = parser->previous.end;
+                    parser->previous.type = PM_TOKEN_MISSING;
+                }
+            } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
+                node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
+                if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+            } else {
+                // If we get here, then we have interpolation so we'll need
+                // to create a string or symbol node with interpolation.
+                pm_node_list_t parts = { 0 };
+                pm_token_t string_opening = not_provided(parser);
+                pm_token_t string_closing = not_provided(parser);
+
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped));
+                pm_node_flag_set(part, parse_unescaped_encoding(parser));
+                pm_node_list_append(&parts, part);
+
+                while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
+                    if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+                        pm_node_list_append(&parts, part);
+                    }
+                }
+
+                if (accept1(parser, PM_TOKEN_LABEL_END)) {
+                    node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
+                    if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+                } else if (match1(parser, PM_TOKEN_EOF)) {
+                    pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
+                    node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
+                    node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
+                }
+
+                pm_node_list_free(&parts);
+            }
+        } else {
+            // If we get here, then the first part of the string is not plain
+            // string content, in which case we need to parse the string as an
+            // interpolated string.
+            pm_node_list_t parts = { 0 };
+            pm_node_t *part;
+
+            while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
+                if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+                    pm_node_list_append(&parts, part);
+                }
+            }
+
+            if (accept1(parser, PM_TOKEN_LABEL_END)) {
+                node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
+                if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
+            } else if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
+                node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
+            } else {
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
+                node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
+            }
+
+            pm_node_list_free(&parts);
+        }
+
+        if (current == NULL) {
+            // If the node we just parsed is a symbol node, then we can't
+            // concatenate it with anything else, so we can now return that
+            // node.
+            if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
+                return node;
+            }
+
+            // If we don't already have a node, then it's fine and we can just
+            // set the result to be the node we just parsed.
+            current = node;
+        } else {
+            // Otherwise we need to check the type of the node we just parsed.
+            // If it cannot be concatenated with the previous node, then we'll
+            // need to add a syntax error.
+            if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
+                pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
+            }
+
+            // If we haven't already created our container for concatenation,
+            // we'll do that now.
+            if (!concating) {
+                if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                    pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
+                }
+
+                concating = true;
+                pm_token_t bounds = not_provided(parser);
+
+                pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
+                pm_interpolated_string_node_append(container, current);
+                current = UP(container);
+            }
+
+            pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
+        }
+    }
+
+    return current;
+}
+
+#define PM_PARSE_PATTERN_SINGLE 0
+#define PM_PARSE_PATTERN_TOP 1
+#define PM_PARSE_PATTERN_MULTI 2
+
+static pm_node_t *
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
+
+/**
+ * Add the newly created local to the list of captures for this pattern matching
+ * expression. If it is duplicated from a previous local, then we'll need to add
+ * an error to the parser.
+ */
+static void
+parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
+    // Skip this capture if it starts with an underscore.
+    if (peek_at(parser, location->start) == '_') return;
+
+    if (pm_constant_id_list_includes(captures, capture)) {
+        pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
+    } else {
+        pm_constant_id_list_append(captures, capture);
+    }
+}
+
+/**
+ * Accept any number of constants joined by :: delimiters.
+ */
+static pm_node_t *
+parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
+    // Now, if there are any :: operators that follow, parse them as constant
+    // path nodes.
+    while (accept1(parser, PM_TOKEN_COLON_COLON)) {
+        pm_token_t delimiter = parser->previous;
+        expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+        node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
+    }
+
+    // If there is a [ or ( that follows, then this is part of a larger pattern
+    // expression. We'll parse the inner pattern here, then modify the returned
+    // inner pattern with our constant path attached.
+    if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
+        return node;
+    }
+
+    pm_token_t opening;
+    pm_token_t closing;
+    pm_node_t *inner = NULL;
+
+    if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
+        opening = parser->previous;
+        accept1(parser, PM_TOKEN_NEWLINE);
+
+        if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+            inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
+            accept1(parser, PM_TOKEN_NEWLINE);
+            expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
+        }
+
+        closing = parser->previous;
+    } else {
+        parser_lex(parser);
+        opening = parser->previous;
+        accept1(parser, PM_TOKEN_NEWLINE);
+
+        if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+            inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
+            accept1(parser, PM_TOKEN_NEWLINE);
+            expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
+        }
+
+        closing = parser->previous;
+    }
+
+    if (!inner) {
+        // If there was no inner pattern, then we have something like Foo() or
+        // Foo[]. In that case we'll create an array pattern with no requireds.
+        return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
+    }
+
+    // Now that we have the inner pattern, check to see if it's an array, find,
+    // or hash pattern. If it is, then we'll attach our constant path to it if
+    // it doesn't already have a constant. If it's not one of those node types
+    // or it does have a constant, then we'll create an array pattern.
+    switch (PM_NODE_TYPE(inner)) {
+        case PM_ARRAY_PATTERN_NODE: {
+            pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
+
+            if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
+                pattern_node->base.location.start = node->location.start;
+                pattern_node->base.location.end = closing.end;
+
+                pattern_node->constant = node;
+                pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+
+                return UP(pattern_node);
+            }
+
+            break;
+        }
+        case PM_FIND_PATTERN_NODE: {
+            pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
+
+            if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
+                pattern_node->base.location.start = node->location.start;
+                pattern_node->base.location.end = closing.end;
+
+                pattern_node->constant = node;
+                pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+
+                return UP(pattern_node);
+            }
+
+            break;
+        }
+        case PM_HASH_PATTERN_NODE: {
+            pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
+
+            if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
+                pattern_node->base.location.start = node->location.start;
+                pattern_node->base.location.end = closing.end;
+
+                pattern_node->constant = node;
+                pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+
+                return UP(pattern_node);
+            }
+
+            break;
+        }
+        default:
+            break;
+    }
+
+    // If we got here, then we didn't return one of the inner patterns by
+    // attaching its constant. In this case we'll create an array pattern and
+    // attach our constant to it.
+    pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
+    pm_array_pattern_node_requireds_append(pattern_node, inner);
+    return UP(pattern_node);
+}
+
+/**
+ * Parse a rest pattern.
+ */
+static pm_splat_node_t *
+parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
+    assert(parser->previous.type == PM_TOKEN_USTAR);
+    pm_token_t operator = parser->previous;
+    pm_node_t *name = NULL;
+
+    // Rest patterns don't necessarily have a name associated with them. So we
+    // will check for that here. If they do, then we'll add it to the local
+    // table since this pattern will cause it to become a local variable.
+    if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+        pm_token_t identifier = parser->previous;
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
+
+        int depth;
+        if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+            pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
+        }
+
+        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
+        name = UP(pm_local_variable_target_node_create(
+            parser,
+            &PM_LOCATION_TOKEN_VALUE(&identifier),
+            constant_id,
+            (uint32_t) (depth == -1 ? 0 : depth)
+        ));
+    }
+
+    // Finally we can return the created node.
+    return pm_splat_node_create(parser, &operator, name);
+}
+
+/**
+ * Parse a keyword rest node.
+ */
+static pm_node_t *
+parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
+    assert(parser->current.type == PM_TOKEN_USTAR_STAR);
+    parser_lex(parser);
+
+    pm_token_t operator = parser->previous;
+    pm_node_t *value = NULL;
+
+    if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
+        return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
+    }
+
+    if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+
+        int depth;
+        if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+            pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
+        }
+
+        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+        value = UP(pm_local_variable_target_node_create(
+            parser,
+            &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+            constant_id,
+            (uint32_t) (depth == -1 ? 0 : depth)
+        ));
+    }
+
+    return UP(pm_assoc_splat_node_create(parser, value, &operator));
+}
+
+/**
+ * Check that the slice of the source given by the bounds parameters constitutes
+ * a valid local variable name.
+ */
+static bool
+pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    ptrdiff_t length = end - start;
+    if (length == 0) return false;
+
+    // First ensure that it starts with a valid identifier starting character.
+    size_t width = char_is_identifier_start(parser, start, end - start);
+    if (width == 0) return false;
+
+    // Next, ensure that it's not an uppercase character.
+    if (parser->encoding_changed) {
+        if (parser->encoding->isupper_char(start, length)) return false;
+    } else {
+        if (pm_encoding_utf_8_isupper_char(start, length)) return false;
+    }
+
+    // Next, iterate through all of the bytes of the string to ensure that they
+    // are all valid identifier characters.
+    const uint8_t *cursor = start + width;
+    while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
+    return cursor == end;
+}
+
+/**
+ * Create an implicit node for the value of a hash pattern that has omitted the
+ * value. This will use an implicit local variable target.
+ */
+static pm_node_t *
+parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
+    const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
+
+    pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
+    int depth = -1;
+
+    if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
+        depth = pm_parser_local_depth_constant_id(parser, constant_id);
+    } else {
+        pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
+
+        if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
+            PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
+        }
+    }
+
+    if (depth == -1) {
+        pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
+    }
+
+    parse_pattern_capture(parser, captures, constant_id, value_loc);
+    pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
+        parser,
+        value_loc,
+        constant_id,
+        (uint32_t) (depth == -1 ? 0 : depth)
+    );
+
+    return UP(pm_implicit_node_create(parser, UP(target)));
+}
+
+/**
+ * Add a node to the list of keys for a hash pattern, and if it is a duplicate
+ * then add an error to the parser.
+ */
+static void
+parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
+    if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
+        pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
+    }
+}
+
+/**
+ * Parse a hash pattern.
+ */
+static pm_hash_pattern_node_t *
+parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
+    pm_node_list_t assocs = { 0 };
+    pm_static_literals_t keys = { 0 };
+    pm_node_t *rest = NULL;
+
+    switch (PM_NODE_TYPE(first_node)) {
+        case PM_ASSOC_SPLAT_NODE:
+        case PM_NO_KEYWORDS_PARAMETER_NODE:
+            rest = first_node;
+            break;
+        case PM_SYMBOL_NODE: {
+            if (pm_symbol_node_label_p(first_node)) {
+                parse_pattern_hash_key(parser, &keys, first_node);
+                pm_node_t *value;
+
+                if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
+                    // Otherwise, we will create an implicit local variable
+                    // target for the value.
+                    value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+                } else {
+                    // Here we have a value for the first assoc in the list, so
+                    // we will parse it now.
+                    value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
+                }
+
+                pm_token_t operator = not_provided(parser);
+                pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
+
+                pm_node_list_append(&assocs, assoc);
+                break;
+            }
+        }
+        PRISM_FALLTHROUGH
+        default: {
+            // If we get anything else, then this is an error. For this we'll
+            // create a missing node for the value and create an assoc node for
+            // the first node in the list.
+            pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
+            pm_parser_err_node(parser, first_node, diag_id);
+
+            pm_token_t operator = not_provided(parser);
+            pm_node_t *value = UP(pm_missing_node_create(parser, first_node->location.start, first_node->location.end));
+            pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, &operator, value));
+
+            pm_node_list_append(&assocs, assoc);
+            break;
+        }
+    }
+
+    // If there are any other assocs, then we'll parse them now.
+    while (accept1(parser, PM_TOKEN_COMMA)) {
+        // Here we need to break to support trailing commas.
+        if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
+            // Trailing commas are not allowed to follow a rest pattern.
+            if (rest != NULL) {
+                pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
+            }
+
+            break;
+        }
+
+        if (match1(parser, PM_TOKEN_USTAR_STAR)) {
+            pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
+
+            if (rest == NULL) {
+                rest = assoc;
+            } else {
+                pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
+                pm_node_list_append(&assocs, assoc);
+            }
+        } else {
+            pm_node_t *key;
+
+            if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+                key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
+
+                if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
+                } else if (!pm_symbol_node_label_p(key)) {
+                    pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
+                }
+            } else {
+                expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
+                key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+            }
+
+            parse_pattern_hash_key(parser, &keys, key);
+            pm_node_t *value = NULL;
+
+            if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
+                    value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+                } else {
+                    value = UP(pm_missing_node_create(parser, key->location.end, key->location.end));
+                }
+            } else {
+                value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
+            }
+
+            pm_token_t operator = not_provided(parser);
+            pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, &operator, value));
+
+            if (rest != NULL) {
+                pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
+            }
+
+            pm_node_list_append(&assocs, assoc);
+        }
+    }
+
+    pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
+    xfree(assocs.nodes);
+
+    pm_static_literals_free(&keys);
+    return node;
+}
+
+/**
+ * Parse a pattern expression primitive.
+ */
+static pm_node_t *
+parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    switch (parser->current.type) {
+        case PM_TOKEN_IDENTIFIER:
+        case PM_TOKEN_METHOD_NAME: {
+            parser_lex(parser);
+            pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+
+            int depth;
+            if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+                pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
+            }
+
+            parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+            return UP(pm_local_variable_target_node_create(
+                parser,
+                &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+                constant_id,
+                (uint32_t) (depth == -1 ? 0 : depth)
+            ));
+        }
+        case PM_TOKEN_BRACKET_LEFT_ARRAY: {
+            pm_token_t opening = parser->current;
+            parser_lex(parser);
+
+            if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+                // If we have an empty array pattern, then we'll just return a new
+                // array pattern node.
+                return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
+            }
+
+            // Otherwise, we'll parse the inner pattern, then deal with it depending
+            // on the type it returns.
+            pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
+
+            accept1(parser, PM_TOKEN_NEWLINE);
+            expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
+            pm_token_t closing = parser->previous;
+
+            switch (PM_NODE_TYPE(inner)) {
+                case PM_ARRAY_PATTERN_NODE: {
+                    pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
+                    if (pattern_node->opening_loc.start == NULL) {
+                        pattern_node->base.location.start = opening.start;
+                        pattern_node->base.location.end = closing.end;
+
+                        pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                        pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+
+                        return UP(pattern_node);
+                    }
+
+                    break;
+                }
+                case PM_FIND_PATTERN_NODE: {
+                    pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
+                    if (pattern_node->opening_loc.start == NULL) {
+                        pattern_node->base.location.start = opening.start;
+                        pattern_node->base.location.end = closing.end;
+
+                        pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                        pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+
+                        return UP(pattern_node);
+                    }
+
+                    break;
+                }
+                default:
+                    break;
+            }
+
+            pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
+            pm_array_pattern_node_requireds_append(node, inner);
+            return UP(node);
+        }
+        case PM_TOKEN_BRACE_LEFT: {
+            bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+            parser->pattern_matching_newlines = false;
+
+            pm_hash_pattern_node_t *node;
+            pm_token_t opening = parser->current;
+            parser_lex(parser);
+
+            if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
+                // If we have an empty hash pattern, then we'll just return a new hash
+                // pattern node.
+                node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
+            } else {
+                pm_node_t *first_node;
+
+                switch (parser->current.type) {
+                    case PM_TOKEN_LABEL:
+                        parser_lex(parser);
+                        first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
+                        break;
+                    case PM_TOKEN_USTAR_STAR:
+                        first_node = parse_pattern_keyword_rest(parser, captures);
+                        break;
+                    case PM_TOKEN_STRING_BEGIN:
+                        first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
+                        break;
+                    default: {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
+                        parser_lex(parser);
+
+                        first_node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
+                        break;
+                    }
+                }
+
+                node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
+
+                accept1(parser, PM_TOKEN_NEWLINE);
+                expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
+                pm_token_t closing = parser->previous;
+
+                node->base.location.start = opening.start;
+                node->base.location.end = closing.end;
+
+                node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+            }
+
+            parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+            return UP(node);
+        }
+        case PM_TOKEN_UDOT_DOT:
+        case PM_TOKEN_UDOT_DOT_DOT: {
+            pm_token_t operator = parser->current;
+            parser_lex(parser);
+
+            // Since we have a unary range operator, we need to parse the subsequent
+            // expression as the right side of the range.
+            switch (parser->current.type) {
+                case PM_CASE_PRIMITIVE: {
+                    pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+                    return UP(pm_range_node_create(parser, NULL, &operator, right));
+                }
+                default: {
+                    pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
+                    pm_node_t *right = UP(pm_missing_node_create(parser, operator.start, operator.end));
+                    return UP(pm_range_node_create(parser, NULL, &operator, right));
+                }
+            }
+        }
+        case PM_CASE_PRIMITIVE: {
+            pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
+
+            // If we found a label, we need to immediately return to the caller.
+            if (pm_symbol_node_label_p(node)) return node;
+
+            // Call nodes (arithmetic operations) are not allowed in patterns
+            if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
+                pm_parser_err_node(parser, node, diag_id);
+                pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end);
+
+                pm_node_unreference(parser, node);
+                pm_node_destroy(parser, node);
+                return UP(missing_node);
+            }
+
+            // Now that we have a primitive, we need to check if it's part of a range.
+            if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
+                pm_token_t operator = parser->previous;
+
+                // Now that we have the operator, we need to check if this is followed
+                // by another expression. If it is, then we will create a full range
+                // node. Otherwise, we'll create an endless range.
+                switch (parser->current.type) {
+                    case PM_CASE_PRIMITIVE: {
+                        pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+                        return UP(pm_range_node_create(parser, node, &operator, right));
+                    }
+                    default:
+                        return UP(pm_range_node_create(parser, node, &operator, NULL));
+                }
+            }
+
+            return node;
+        }
+        case PM_TOKEN_CARET: {
+            parser_lex(parser);
+            pm_token_t operator = parser->previous;
+
+            // At this point we have a pin operator. We need to check the subsequent
+            // expression to determine if it's a variable or an expression.
+            switch (parser->current.type) {
+                case PM_TOKEN_IDENTIFIER: {
+                    parser_lex(parser);
+                    pm_node_t *variable = UP(parse_variable(parser));
+
+                    if (variable == NULL) {
+                        PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
+                        variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
+                    }
+
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+                case PM_TOKEN_INSTANCE_VARIABLE: {
+                    parser_lex(parser);
+                    pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
+
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+                case PM_TOKEN_CLASS_VARIABLE: {
+                    parser_lex(parser);
+                    pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
+
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+                case PM_TOKEN_GLOBAL_VARIABLE: {
+                    parser_lex(parser);
+                    pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
+
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+                case PM_TOKEN_NUMBERED_REFERENCE: {
+                    parser_lex(parser);
+                    pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
+
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+                case PM_TOKEN_BACK_REFERENCE: {
+                    parser_lex(parser);
+                    pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
+
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+                case PM_TOKEN_PARENTHESIS_LEFT: {
+                    bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+                    parser->pattern_matching_newlines = false;
+
+                    pm_token_t lparen = parser->current;
+                    parser_lex(parser);
+
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
+                    parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+
+                    accept1(parser, PM_TOKEN_NEWLINE);
+                    expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
+                    return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
+                }
+                default: {
+                    // If we get here, then we have a pin operator followed by something
+                    // not understood. We'll create a missing node and return that.
+                    pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
+                    pm_node_t *variable = UP(pm_missing_node_create(parser, operator.start, operator.end));
+                    return UP(pm_pinned_variable_node_create(parser, &operator, variable));
+                }
+            }
+        }
+        case PM_TOKEN_UCOLON_COLON: {
+            pm_token_t delimiter = parser->current;
+            parser_lex(parser);
+
+            expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+            pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
+
+            return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
+        }
+        case PM_TOKEN_CONSTANT: {
+            pm_token_t constant = parser->current;
+            parser_lex(parser);
+
+            pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
+            return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
+        }
+        default:
+            pm_parser_err_current(parser, diag_id);
+            return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+    }
+}
+
+static bool
+parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_LOCAL_VARIABLE_TARGET_NODE:
+            pm_parser_err((pm_parser_t *) data, node->location.start, node->location.end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
+            return false;
+        default:
+            return true;
+    }
+}
+
+/**
+ * When we get here, we know that we already have a syntax error, because we
+ * know we have captured a variable and that we are in an alternation.
+ */
+static void
+parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
+    pm_visit_node(node, parse_pattern_alternation_error_each, parser);
+}
+
+/**
+ * Parse any number of primitives joined by alternation and ended optionally by
+ * assignment.
+ */
+static pm_node_t *
+parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    pm_node_t *node = first_node;
+    bool alternation = false;
+
+    while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
+        if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
+            parse_pattern_alternation_error(parser, node);
+        }
+
+        switch (parser->current.type) {
+            case PM_TOKEN_IDENTIFIER:
+            case PM_TOKEN_BRACKET_LEFT_ARRAY:
+            case PM_TOKEN_BRACE_LEFT:
+            case PM_TOKEN_CARET:
+            case PM_TOKEN_CONSTANT:
+            case PM_TOKEN_UCOLON_COLON:
+            case PM_TOKEN_UDOT_DOT:
+            case PM_TOKEN_UDOT_DOT_DOT:
+            case PM_CASE_PRIMITIVE: {
+                if (!alternation) {
+                    node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
+                } else {
+                    pm_token_t operator = parser->previous;
+                    pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
+
+                    if (captures->size) parse_pattern_alternation_error(parser, right);
+                    node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
+                }
+
+                break;
+            }
+            case PM_TOKEN_PARENTHESIS_LEFT:
+            case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
+                pm_token_t operator = parser->previous;
+                pm_token_t opening = parser->current;
+                parser_lex(parser);
+
+                pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
+                accept1(parser, PM_TOKEN_NEWLINE);
+                expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
+                pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
+
+                if (!alternation) {
+                    node = right;
+                } else {
+                    if (captures->size) parse_pattern_alternation_error(parser, right);
+                    node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
+                }
+
+                break;
+            }
+            default: {
+                pm_parser_err_current(parser, diag_id);
+                pm_node_t *right = UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+
+                if (!alternation) {
+                    node = right;
+                } else {
+                    if (captures->size) parse_pattern_alternation_error(parser, right);
+                    node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
+                }
+
+                break;
+            }
+        }
+    }
+
+    // If we have an =>, then we are assigning this pattern to a variable.
+    // In this case we should create an assignment node.
+    while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+        pm_token_t operator = parser->previous;
+        expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
+
+        pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
+        int depth;
+
+        if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
+            pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
+        }
+
+        parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+        pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
+            parser,
+            &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+            constant_id,
+            (uint32_t) (depth == -1 ? 0 : depth)
+        );
+
+        node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
+    }
+
+    return node;
+}
+
+/**
+ * Parse a pattern matching expression.
+ */
+static pm_node_t *
+parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    pm_node_t *node = NULL;
+
+    bool leading_rest = false;
+    bool trailing_rest = false;
+
+    switch (parser->current.type) {
+        case PM_TOKEN_LABEL: {
+            parser_lex(parser);
+            pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+            node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
+
+            if (!(flags & PM_PARSE_PATTERN_TOP)) {
+                pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+            }
+
+            return node;
+        }
+        case PM_TOKEN_USTAR_STAR: {
+            node = parse_pattern_keyword_rest(parser, captures);
+            node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
+
+            if (!(flags & PM_PARSE_PATTERN_TOP)) {
+                pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+            }
+
+            return node;
+        }
+        case PM_TOKEN_STRING_BEGIN: {
+            // We need special handling for string beginnings because they could
+            // be dynamic symbols leading to hash patterns.
+            node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
+
+            if (pm_symbol_node_label_p(node)) {
+                node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
+
+                if (!(flags & PM_PARSE_PATTERN_TOP)) {
+                    pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
+                }
+
+                return node;
+            }
+
+            node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
+            break;
+        }
+        case PM_TOKEN_USTAR: {
+            if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
+                parser_lex(parser);
+                node = UP(parse_pattern_rest(parser, captures));
+                leading_rest = true;
+                break;
+            }
+        }
+        PRISM_FALLTHROUGH
+        default:
+            node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
+            break;
+    }
+
+    // If we got a dynamic label symbol, then we need to treat it like the
+    // beginning of a hash pattern.
+    if (pm_symbol_node_label_p(node)) {
+        return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
+    }
+
+    if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
+        // If we have a comma, then we are now parsing either an array pattern
+        // or a find pattern. We need to parse all of the patterns, put them
+        // into a big list, and then determine which type of node we have.
+        pm_node_list_t nodes = { 0 };
+        pm_node_list_append(&nodes, node);
+
+        // Gather up all of the patterns into the list.
+        while (accept1(parser, PM_TOKEN_COMMA)) {
+            // Break early here in case we have a trailing comma.
+            if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
+                node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+                pm_node_list_append(&nodes, node);
+                trailing_rest = true;
+                break;
+            }
+
+            if (accept1(parser, PM_TOKEN_USTAR)) {
+                node = UP(parse_pattern_rest(parser, captures));
+
+                // If we have already parsed a splat pattern, then this is an
+                // error. We will continue to parse the rest of the patterns,
+                // but we will indicate it as an error.
+                if (trailing_rest) {
+                    pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
+                }
+
+                trailing_rest = true;
+            } else {
+                node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+            }
+
+            pm_node_list_append(&nodes, node);
+        }
+
+        // If the first pattern and the last pattern are rest patterns, then we
+        // will call this a find pattern, regardless of how many rest patterns
+        // are in between because we know we already added the appropriate
+        // errors. Otherwise we will create an array pattern.
+        if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
+            node = UP(pm_find_pattern_node_create(parser, &nodes));
+
+            if (nodes.size == 2) {
+                pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
+            }
+        } else {
+            node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
+
+            if (leading_rest && trailing_rest) {
+                pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
+            }
+        }
+
+        xfree(nodes.nodes);
+    } else if (leading_rest) {
+        // Otherwise, if we parsed a single splat pattern, then we know we have
+        // an array pattern, so we can go ahead and create that node.
+        node = UP(pm_array_pattern_node_rest_create(parser, node));
+    }
+
+    return node;
+}
+
+/**
+ * Incorporate a negative sign into a numeric node by subtracting 1 character
+ * from its start bounds. If it's a compound node, then we will recursively
+ * apply this function to its value.
+ */
+static inline void
+parse_negative_numeric(pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_INTEGER_NODE: {
+            pm_integer_node_t *cast = (pm_integer_node_t *) node;
+            cast->base.location.start--;
+            cast->value.negative = true;
+            break;
+        }
+        case PM_FLOAT_NODE: {
+            pm_float_node_t *cast = (pm_float_node_t *) node;
+            cast->base.location.start--;
+            cast->value = -cast->value;
+            break;
+        }
+        case PM_RATIONAL_NODE: {
+            pm_rational_node_t *cast = (pm_rational_node_t *) node;
+            cast->base.location.start--;
+            cast->numerator.negative = true;
+            break;
+        }
+        case PM_IMAGINARY_NODE:
+            node->location.start--;
+            parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
+            break;
+        default:
+            assert(false && "unreachable");
+            break;
+    }
+}
+
+/**
+ * Append an error to the error list on the parser using the given diagnostic
+ * ID. This function is a specialization that handles formatting the specific
+ * kind of error that is being appended.
+ */
+static void
+pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+    switch (diag_id) {
+        case PM_ERR_HASH_KEY: {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
+            break;
+        }
+        case PM_ERR_HASH_VALUE:
+        case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+            break;
+        }
+        case PM_ERR_UNARY_RECEIVER: {
+            const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
+            break;
+        }
+        case PM_ERR_UNARY_DISALLOWED:
+        case PM_ERR_EXPECT_ARGUMENT: {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+            break;
+        }
+        default:
+            pm_parser_err_previous(parser, diag_id);
+            break;
+    }
+}
+
+/**
+ * Ensures that the current retry token is valid in the current context.
+ */
+static void
+parse_retry(pm_parser_t *parser, const pm_node_t *node) {
+#define CONTEXT_NONE 0
+#define CONTEXT_THROUGH_ENSURE 1
+#define CONTEXT_THROUGH_ELSE 2
+
+    pm_context_node_t *context_node = parser->current_context;
+    int context = CONTEXT_NONE;
+
+    while (context_node != NULL) {
+        switch (context_node->context) {
+            case PM_CONTEXT_BEGIN_RESCUE:
+            case PM_CONTEXT_BLOCK_RESCUE:
+            case PM_CONTEXT_CLASS_RESCUE:
+            case PM_CONTEXT_DEF_RESCUE:
+            case PM_CONTEXT_LAMBDA_RESCUE:
+            case PM_CONTEXT_MODULE_RESCUE:
+            case PM_CONTEXT_SCLASS_RESCUE:
+            case PM_CONTEXT_DEFINED:
+            case PM_CONTEXT_RESCUE_MODIFIER:
+                // These are the good cases. We're allowed to have a retry here.
+                return;
+            case PM_CONTEXT_CLASS:
+            case PM_CONTEXT_DEF:
+            case PM_CONTEXT_DEF_PARAMS:
+            case PM_CONTEXT_MAIN:
+            case PM_CONTEXT_MODULE:
+            case PM_CONTEXT_PREEXE:
+            case PM_CONTEXT_SCLASS:
+                // These are the bad cases. We're not allowed to have a retry in
+                // these contexts.
+                if (context == CONTEXT_NONE) {
+                    pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
+                } else if (context == CONTEXT_THROUGH_ENSURE) {
+                    pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
+                } else if (context == CONTEXT_THROUGH_ELSE) {
+                    pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
+                }
+                return;
+            case PM_CONTEXT_BEGIN_ELSE:
+            case PM_CONTEXT_BLOCK_ELSE:
+            case PM_CONTEXT_CLASS_ELSE:
+            case PM_CONTEXT_DEF_ELSE:
+            case PM_CONTEXT_LAMBDA_ELSE:
+            case PM_CONTEXT_MODULE_ELSE:
+            case PM_CONTEXT_SCLASS_ELSE:
+                // These are also bad cases, but with a more specific error
+                // message indicating the else.
+                context = CONTEXT_THROUGH_ELSE;
+                break;
+            case PM_CONTEXT_BEGIN_ENSURE:
+            case PM_CONTEXT_BLOCK_ENSURE:
+            case PM_CONTEXT_CLASS_ENSURE:
+            case PM_CONTEXT_DEF_ENSURE:
+            case PM_CONTEXT_LAMBDA_ENSURE:
+            case PM_CONTEXT_MODULE_ENSURE:
+            case PM_CONTEXT_SCLASS_ENSURE:
+                // These are also bad cases, but with a more specific error
+                // message indicating the ensure.
+                context = CONTEXT_THROUGH_ENSURE;
+                break;
+            case PM_CONTEXT_NONE:
+                // This case should never happen.
+                assert(false && "unreachable");
+                break;
+            case PM_CONTEXT_BEGIN:
+            case PM_CONTEXT_BLOCK_BRACES:
+            case PM_CONTEXT_BLOCK_KEYWORDS:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
+            case PM_CONTEXT_CASE_IN:
+            case PM_CONTEXT_CASE_WHEN:
+            case PM_CONTEXT_DEFAULT_PARAMS:
+            case PM_CONTEXT_ELSE:
+            case PM_CONTEXT_ELSIF:
+            case PM_CONTEXT_EMBEXPR:
+            case PM_CONTEXT_FOR_INDEX:
+            case PM_CONTEXT_FOR:
+            case PM_CONTEXT_IF:
+            case PM_CONTEXT_LAMBDA_BRACES:
+            case PM_CONTEXT_LAMBDA_DO_END:
+            case PM_CONTEXT_LOOP_PREDICATE:
+            case PM_CONTEXT_MULTI_TARGET:
+            case PM_CONTEXT_PARENS:
+            case PM_CONTEXT_POSTEXE:
+            case PM_CONTEXT_PREDICATE:
+            case PM_CONTEXT_TERNARY:
+            case PM_CONTEXT_UNLESS:
+            case PM_CONTEXT_UNTIL:
+            case PM_CONTEXT_WHILE:
+                // In these contexts we should continue walking up the list of
+                // contexts.
+                break;
+        }
+
+        context_node = context_node->prev;
+    }
+
+#undef CONTEXT_NONE
+#undef CONTEXT_ENSURE
+#undef CONTEXT_ELSE
+}
+
+/**
+ * Ensures that the current yield token is valid in the current context.
+ */
+static void
+parse_yield(pm_parser_t *parser, const pm_node_t *node) {
+    pm_context_node_t *context_node = parser->current_context;
+
+    while (context_node != NULL) {
+        switch (context_node->context) {
+            case PM_CONTEXT_DEF:
+            case PM_CONTEXT_DEF_PARAMS:
+            case PM_CONTEXT_DEFINED:
+            case PM_CONTEXT_DEF_ENSURE:
+            case PM_CONTEXT_DEF_RESCUE:
+            case PM_CONTEXT_DEF_ELSE:
+                // These are the good cases. We're allowed to have a block exit
+                // in these contexts.
+                return;
+            case PM_CONTEXT_CLASS:
+            case PM_CONTEXT_CLASS_ENSURE:
+            case PM_CONTEXT_CLASS_RESCUE:
+            case PM_CONTEXT_CLASS_ELSE:
+            case PM_CONTEXT_MAIN:
+            case PM_CONTEXT_MODULE:
+            case PM_CONTEXT_MODULE_ENSURE:
+            case PM_CONTEXT_MODULE_RESCUE:
+            case PM_CONTEXT_MODULE_ELSE:
+            case PM_CONTEXT_SCLASS:
+            case PM_CONTEXT_SCLASS_RESCUE:
+            case PM_CONTEXT_SCLASS_ENSURE:
+            case PM_CONTEXT_SCLASS_ELSE:
+                // These are the bad cases. We're not allowed to have a retry in
+                // these contexts.
+                pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
+                return;
+            case PM_CONTEXT_NONE:
+                // This case should never happen.
+                assert(false && "unreachable");
+                break;
+            case PM_CONTEXT_BEGIN:
+            case PM_CONTEXT_BEGIN_ELSE:
+            case PM_CONTEXT_BEGIN_ENSURE:
+            case PM_CONTEXT_BEGIN_RESCUE:
+            case PM_CONTEXT_BLOCK_BRACES:
+            case PM_CONTEXT_BLOCK_KEYWORDS:
+            case PM_CONTEXT_BLOCK_ELSE:
+            case PM_CONTEXT_BLOCK_ENSURE:
+            case PM_CONTEXT_BLOCK_PARAMETERS:
+            case PM_CONTEXT_BLOCK_RESCUE:
+            case PM_CONTEXT_CASE_IN:
+            case PM_CONTEXT_CASE_WHEN:
+            case PM_CONTEXT_DEFAULT_PARAMS:
+            case PM_CONTEXT_ELSE:
+            case PM_CONTEXT_ELSIF:
+            case PM_CONTEXT_EMBEXPR:
+            case PM_CONTEXT_FOR_INDEX:
+            case PM_CONTEXT_FOR:
+            case PM_CONTEXT_IF:
+            case PM_CONTEXT_LAMBDA_BRACES:
+            case PM_CONTEXT_LAMBDA_DO_END:
+            case PM_CONTEXT_LAMBDA_ELSE:
+            case PM_CONTEXT_LAMBDA_ENSURE:
+            case PM_CONTEXT_LAMBDA_RESCUE:
+            case PM_CONTEXT_LOOP_PREDICATE:
+            case PM_CONTEXT_MULTI_TARGET:
+            case PM_CONTEXT_PARENS:
+            case PM_CONTEXT_POSTEXE:
+            case PM_CONTEXT_PREDICATE:
+            case PM_CONTEXT_PREEXE:
+            case PM_CONTEXT_RESCUE_MODIFIER:
+            case PM_CONTEXT_TERNARY:
+            case PM_CONTEXT_UNLESS:
+            case PM_CONTEXT_UNTIL:
+            case PM_CONTEXT_WHILE:
+                // In these contexts we should continue walking up the list of
+                // contexts.
+                break;
+        }
+
+        context_node = context_node->prev;
+    }
+}
+
+/**
+ * This struct is used to pass information between the regular expression parser
+ * and the error callback.
+ */
+typedef struct {
+    /** The parser that we are parsing the regular expression for. */
+    pm_parser_t *parser;
+
+    /** The start of the regular expression. */
+    const uint8_t *start;
+
+    /** The end of the regular expression. */
+    const uint8_t *end;
+
+    /**
+     * Whether or not the source of the regular expression is shared. This
+     * impacts the location of error messages, because if it is shared then we
+     * can use the location directly and if it is not, then we use the bounds of
+     * the regular expression itself.
+     */
+    bool shared;
+} parse_regular_expression_error_data_t;
+
+/**
+ * This callback is called when the regular expression parser encounters a
+ * syntax error.
+ */
+static void
+parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
+    parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
+    pm_location_t location;
+
+    if (callback_data->shared) {
+        location = (pm_location_t) { .start = start, .end = end };
+    } else {
+        location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
+    }
+
+    PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
+}
+
+/**
+ * Parse the errors for the regular expression and add them to the parser.
+ */
+static void
+parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
+    const pm_string_t *unescaped = &node->unescaped;
+    parse_regular_expression_error_data_t error_data = {
+        .parser = parser,
+        .start = node->base.location.start,
+        .end = node->base.location.end,
+        .shared = unescaped->type == PM_STRING_SHARED
+    };
+
+    pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
+}
+
+/**
+ * Parse an expression that begins with the previous node that we just lexed.
+ */
+static inline pm_node_t *
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    switch (parser->current.type) {
+        case PM_TOKEN_BRACKET_LEFT_ARRAY: {
+            parser_lex(parser);
+
+            pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
+            pm_accepts_block_stack_push(parser, true);
+            bool parsed_bare_hash = false;
+
+            while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
+                bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
+
+                // Handle the case where we don't have a comma and we have a
+                // newline followed by a right bracket.
+                if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+                    break;
+                }
+
+                // Ensure that we have a comma between elements in the array.
+                if (array->elements.size > 0) {
+                    if (accept1(parser, PM_TOKEN_COMMA)) {
+                        // If there was a comma but we also accepts a newline,
+                        // then this is a syntax error.
+                        if (accepted_newline) {
+                            pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+                        }
+                    } else {
+                        // If there was no comma, then we need to add a syntax
+                        // error.
+                        const uint8_t *location = parser->previous.end;
+                        PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
+
+                        parser->previous.start = location;
+                        parser->previous.type = PM_TOKEN_MISSING;
+                    }
+                }
+
+                // If we have a right bracket immediately following a comma,
+                // this is allowed since it's a trailing comma. In this case we
+                // can break out of the loop.
+                if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
+
+                pm_node_t *element;
+
+                if (accept1(parser, PM_TOKEN_USTAR)) {
+                    pm_token_t operator = parser->previous;
+                    pm_node_t *expression = NULL;
+
+                    if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
+                        pm_parser_scope_forwarding_positionals_check(parser, &operator);
+                    } else {
+                        expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                    }
+
+                    element = UP(pm_splat_node_create(parser, &operator, expression));
+                } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
+                    if (parsed_bare_hash) {
+                        pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
+                    }
+
+                    element = UP(pm_keyword_hash_node_create(parser));
+                    pm_static_literals_t hash_keys = { 0 };
+
+                    if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                        parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
+                    }
+
+                    pm_static_literals_free(&hash_keys);
+                    parsed_bare_hash = true;
+                } else {
+                    element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
+
+                    if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+                        if (parsed_bare_hash) {
+                            pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
+                        }
+
+                        pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
+                        pm_static_literals_t hash_keys = { 0 };
+                        pm_hash_key_static_literals_add(parser, &hash_keys, element);
+
+                        pm_token_t operator;
+                        if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
+                            operator = parser->previous;
+                        } else {
+                            operator = not_provided(parser);
+                        }
+
+                        pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+                        pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, &operator, value));
+                        pm_keyword_hash_node_elements_append(hash, assoc);
+
+                        element = UP(hash);
+                        if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+                            parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
+                        }
+
+                        pm_static_literals_free(&hash_keys);
+                        parsed_bare_hash = true;
+                    }
+                }
+
+                pm_array_node_elements_append(array, element);
+                if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+            }
+
+            accept1(parser, PM_TOKEN_NEWLINE);
+
+            if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+                parser->previous.start = parser->previous.end;
+                parser->previous.type = PM_TOKEN_MISSING;
+            }
+
+            pm_array_node_close_set(array, &parser->previous);
+            pm_accepts_block_stack_pop(parser);
+
+            return UP(array);
+        }
+        case PM_TOKEN_PARENTHESIS_LEFT:
+        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
+            pm_token_t opening = parser->current;
+            pm_node_flags_t flags = 0;
+
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            parser_lex(parser);
+            while (true) {
+                if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+                    flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+                } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+                    break;
+                }
+            }
+
+            // If this is the end of the file or we match a right parenthesis, then
+            // we have an empty parentheses node, and we can immediately return.
+            if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
+                expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+                pop_block_exits(parser, previous_block_exits);
+                pm_node_list_free(&current_block_exits);
+
+                return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags));
+            }
+
+            // Otherwise, we're going to parse the first statement in the list
+            // of statements within the parentheses.
+            pm_accepts_block_stack_push(parser, true);
+            context_push(parser, PM_CONTEXT_PARENS);
+            pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+            context_pop(parser);
+
+            // Determine if this statement is followed by a terminator. In the
+            // case of a single statement, this is fine. But in the case of
+            // multiple statements it's required.
+            bool terminator_found = false;
+
+            if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+                terminator_found = true;
+                flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+            } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
+                terminator_found = true;
+            }
+
+            if (terminator_found) {
+                while (true) {
+                    if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+                        flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+                    } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+                        break;
+                    }
+                }
+            }
+
+            // If we hit a right parenthesis, then we're done parsing the
+            // parentheses node, and we can check which kind of node we should
+            // return.
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
+                    lex_state_set(parser, PM_LEX_STATE_ENDARG);
+                }
+
+                parser_lex(parser);
+                pm_accepts_block_stack_pop(parser);
+
+                pop_block_exits(parser, previous_block_exits);
+                pm_node_list_free(&current_block_exits);
+
+                if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+                    // If we have a single statement and are ending on a right
+                    // parenthesis, then we need to check if this is possibly a
+                    // multiple target node.
+                    pm_multi_target_node_t *multi_target;
+
+                    if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
+                        multi_target = (pm_multi_target_node_t *) statement;
+                    } else {
+                        multi_target = pm_multi_target_node_create(parser);
+                        pm_multi_target_node_targets_append(parser, multi_target, statement);
+                    }
+
+                    pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                    pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+
+                    multi_target->lparen_loc = lparen_loc;
+                    multi_target->rparen_loc = rparen_loc;
+                    multi_target->base.location.start = lparen_loc.start;
+                    multi_target->base.location.end = rparen_loc.end;
+
+                    pm_node_t *result;
+                    if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
+                        result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                        accept1(parser, PM_TOKEN_NEWLINE);
+                    } else {
+                        result = UP(multi_target);
+                    }
+
+                    if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+                        // All set, this is explicitly allowed by the parent
+                        // context.
+                    } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
+                        // All set, we're inside a for loop and we're parsing
+                        // multiple targets.
+                    } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                        // Multi targets are not allowed when it's not a
+                        // statement level.
+                        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+                    } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                        // Multi targets must be followed by an equal sign in
+                        // order to be valid (or a right parenthesis if they are
+                        // nested).
+                        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+                    }
+
+                    return result;
+                }
+
+                // If we have a single statement and are ending on a right parenthesis
+                // and we didn't return a multiple assignment node, then we can return a
+                // regular parentheses node now.
+                pm_statements_node_t *statements = pm_statements_node_create(parser);
+                pm_statements_node_body_append(parser, statements, statement, true);
+
+                return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
+            }
+
+            // If we have more than one statement in the set of parentheses,
+            // then we are going to parse all of them as a list of statements.
+            // We'll do that here.
+            context_push(parser, PM_CONTEXT_PARENS);
+            flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+
+            pm_statements_node_t *statements = pm_statements_node_create(parser);
+            pm_statements_node_body_append(parser, statements, statement, true);
+
+            // If we didn't find a terminator and we didn't find a right
+            // parenthesis, then this is a syntax error.
+            if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+            }
+
+            // Parse each statement within the parentheses.
+            while (true) {
+                pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+                pm_statements_node_body_append(parser, statements, node, true);
+
+                // If we're recovering from a syntax error, then we need to stop
+                // parsing the statements now.
+                if (parser->recovering) {
+                    // If this is the level of context where the recovery has
+                    // happened, then we can mark the parser as done recovering.
+                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
+                    break;
+                }
+
+                // If we couldn't parse an expression at all, then we need to
+                // bail out of the loop.
+                if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
+
+                // If we successfully parsed a statement, then we are going to
+                // need terminator to delimit them.
+                if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                    while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
+                } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                    break;
+                } else if (!match1(parser, PM_TOKEN_EOF)) {
+                    // If we're at the end of the file, then we're going to add
+                    // an error after this for the ) anyway.
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+                }
+            }
+
+            context_pop(parser);
+            pm_accepts_block_stack_pop(parser);
+            expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+            // When we're parsing multi targets, we allow them to be followed by
+            // a right parenthesis if they are at the statement level. This is
+            // only possible if they are the final statement in a parentheses.
+            // We need to explicitly reject that here.
+            {
+                pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
+
+                if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+                    pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+                    pm_multi_target_node_targets_append(parser, multi_target, statement);
+
+                    statement = UP(multi_target);
+                    statements->body.nodes[statements->body.size - 1] = statement;
+                }
+
+                if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
+                    const uint8_t *offset = statement->location.end;
+                    pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
+                    pm_node_t *value = UP(pm_missing_node_create(parser, offset, offset));
+
+                    statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
+                    statements->body.nodes[statements->body.size - 1] = statement;
+
+                    pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
+                }
+            }
+
+            pop_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            pm_void_statements_check(parser, statements, true);
+            return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, flags));
+        }
+        case PM_TOKEN_BRACE_LEFT: {
+            // If we were passed a current_hash_keys via the parser, then that
+            // means we're already parsing a hash and we want to share the set
+            // of hash keys with this inner hash we're about to parse for the
+            // sake of warnings. We'll set it to NULL after we grab it to make
+            // sure subsequent expressions don't use it. Effectively this is a
+            // way of getting around passing it to every call to
+            // parse_expression.
+            pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
+            parser->current_hash_keys = NULL;
+
+            pm_accepts_block_stack_push(parser, true);
+            parser_lex(parser);
+
+            pm_token_t opening = parser->previous;
+            pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
+
+            if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
+                if (current_hash_keys != NULL) {
+                    parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
+                } else {
+                    pm_static_literals_t hash_keys = { 0 };
+                    parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
+                    pm_static_literals_free(&hash_keys);
+                }
+
+                accept1(parser, PM_TOKEN_NEWLINE);
+            }
+
+            pm_accepts_block_stack_pop(parser);
+            expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
+            pm_hash_node_closing_loc_set(node, &parser->previous);
+
+            return UP(node);
+        }
+        case PM_TOKEN_CHARACTER_LITERAL: {
+            pm_token_t closing = not_provided(parser);
+            pm_node_t *node = UP(pm_string_node_create_current_string(
+                parser,
+                &(pm_token_t) {
+                    .type = PM_TOKEN_STRING_BEGIN,
+                    .start = parser->current.start,
+                    .end = parser->current.start + 1
+                },
+                &(pm_token_t) {
+                    .type = PM_TOKEN_STRING_CONTENT,
+                    .start = parser->current.start + 1,
+                    .end = parser->current.end
+                },
+                &closing
+            ));
+
+            pm_node_flag_set(node, parse_unescaped_encoding(parser));
+
+            // Skip past the character literal here, since now we have handled
+            // parser->explicit_encoding correctly.
+            parser_lex(parser);
+
+            // Characters can be followed by strings in which case they are
+            // automatically concatenated.
+            if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+                return parse_strings(parser, node, false, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_CLASS_VARIABLE: {
+            parser_lex(parser);
+            pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
+
+            if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_CONSTANT: {
+            parser_lex(parser);
+            pm_token_t constant = parser->previous;
+
+            // If a constant is immediately followed by parentheses, then this is in
+            // fact a method call, not a constant read.
+            if (
+                match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
+                (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+                (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
+                match1(parser, PM_TOKEN_BRACE_LEFT)
+            ) {
+                pm_arguments_t arguments = { 0 };
+                parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+                return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
+            }
+
+            pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
+
+            if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
+                // If we get here, then we have a comma immediately following a
+                // constant, so we're going to parse this as a multiple assignment.
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_UCOLON_COLON: {
+            parser_lex(parser);
+            pm_token_t delimiter = parser->previous;
+
+            expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+            pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
+
+            if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_UDOT_DOT:
+        case PM_TOKEN_UDOT_DOT_DOT: {
+            pm_token_t operator = parser->current;
+            parser_lex(parser);
+
+            pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+
+            // Unary .. and ... are special because these are non-associative
+            // operators that can also be unary operators. In this case we need
+            // to explicitly reject code that has a .. or ... that follows this
+            // expression.
+            if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
+                pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
+            }
+
+            return UP(pm_range_node_create(parser, NULL, &operator, right));
+        }
+        case PM_TOKEN_FLOAT:
+            parser_lex(parser);
+            return UP(pm_float_node_create(parser, &parser->previous));
+        case PM_TOKEN_FLOAT_IMAGINARY:
+            parser_lex(parser);
+            return UP(pm_float_node_imaginary_create(parser, &parser->previous));
+        case PM_TOKEN_FLOAT_RATIONAL:
+            parser_lex(parser);
+            return UP(pm_float_node_rational_create(parser, &parser->previous));
+        case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+            parser_lex(parser);
+            return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
+        case PM_TOKEN_NUMBERED_REFERENCE: {
+            parser_lex(parser);
+            pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
+
+            if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_GLOBAL_VARIABLE: {
+            parser_lex(parser);
+            pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
+
+            if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_BACK_REFERENCE: {
+            parser_lex(parser);
+            pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
+
+            if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_IDENTIFIER:
+        case PM_TOKEN_METHOD_NAME: {
+            parser_lex(parser);
+            pm_token_t identifier = parser->previous;
+            pm_node_t *node = parse_variable_call(parser);
+
+            if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
+                // If parse_variable_call returned with a call node, then we
+                // know the identifier is not in the local table. In that case
+                // we need to check if there are arguments following the
+                // identifier.
+                pm_call_node_t *call = (pm_call_node_t *) node;
+                pm_arguments_t arguments = { 0 };
+
+                if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
+                    // Since we found arguments, we need to turn off the
+                    // variable call bit in the flags.
+                    pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
+
+                    call->opening_loc = arguments.opening_loc;
+                    call->arguments = arguments.arguments;
+                    call->closing_loc = arguments.closing_loc;
+                    call->block = arguments.block;
+
+                    const uint8_t *end = pm_arguments_end(&arguments);
+                    if (!end) {
+                        end = call->message_loc.end;
+                    }
+                    call->base.location.end = end;
+                }
+            } else {
+                // Otherwise, we know the identifier is in the local table. This
+                // can still be a method call if it is followed by arguments or
+                // a block, so we need to check for that here.
+                if (
+                    (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+                    (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
+                    match1(parser, PM_TOKEN_BRACE_LEFT)
+                ) {
+                    pm_arguments_t arguments = { 0 };
+                    parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+                    pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
+
+                    if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+                        // If we're about to convert an 'it' implicit local
+                        // variable read into a method call, we need to remove
+                        // it from the list of implicit local variables.
+                        pm_node_unreference(parser, node);
+                    } else {
+                        // Otherwise, we're about to convert a regular local
+                        // variable read into a method call, in which case we
+                        // need to indicate that this was not a read for the
+                        // purposes of warnings.
+                        assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
+
+                        if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
+                            pm_node_unreference(parser, node);
+                        } else {
+                            pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
+                            pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
+                        }
+                    }
+
+                    pm_node_destroy(parser, node);
+                    return UP(fcall);
+                }
+            }
+
+            if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_HEREDOC_START: {
+            // Here we have found a heredoc. We'll parse it and add it to the
+            // list of strings.
+            assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
+            pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
+
+            size_t common_whitespace = (size_t) -1;
+            parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
+
+            parser_lex(parser);
+            pm_token_t opening = parser->previous;
+
+            pm_node_t *node;
+            pm_node_t *part;
+
+            if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
+                // If we get here, then we have an empty heredoc. We'll create
+                // an empty content token and return an empty string node.
+                expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
+                pm_token_t content = parse_strings_empty_content(parser->previous.start);
+
+                if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
+                    node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
+                } else {
+                    node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
+                }
+
+                node->location.end = opening.end;
+            } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
+                // If we get here, then we tried to find something in the
+                // heredoc but couldn't actually parse anything, so we'll just
+                // return a missing node.
+                //
+                // parse_string_part handles its own errors, so there is no need
+                // for us to add one here.
+                node = UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
+            } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
+                // If we get here, then the part that we parsed was plain string
+                // content and we're at the end of the heredoc, so we can return
+                // just a string node with the heredoc opening and closing as
+                // its opening and closing.
+                pm_node_flag_set(part, parse_unescaped_encoding(parser));
+                pm_string_node_t *cast = (pm_string_node_t *) part;
+
+                cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
+                cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
+                cast->base.location = cast->opening_loc;
+
+                if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
+                    assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
+                    cast->base.type = PM_X_STRING_NODE;
+                }
+
+                if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
+                    parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
+                }
+
+                node = UP(cast);
+                expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
+            } else {
+                // If we get here, then we have multiple parts in the heredoc,
+                // so we'll need to create an interpolated string node to hold
+                // them all.
+                pm_node_list_t parts = { 0 };
+                pm_node_list_append(&parts, part);
+
+                while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
+                    if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+                        pm_node_list_append(&parts, part);
+                    }
+                }
+
+                // Now that we have all of the parts, create the correct type of
+                // interpolated node.
+                if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
+                    pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
+                    cast->parts = parts;
+
+                    expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
+                    pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
+
+                    cast->base.location = cast->opening_loc;
+                    node = UP(cast);
+                } else {
+                    pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
+                    pm_node_list_free(&parts);
+
+                    expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
+                    pm_interpolated_string_node_closing_set(cast, &parser->previous);
+
+                    cast->base.location = cast->opening_loc;
+                    node = UP(cast);
+                }
+
+                // If this is a heredoc that is indented with a ~, then we need
+                // to dedent each line by the common leading whitespace.
+                if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
+                    pm_node_list_t *nodes;
+                    if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
+                        nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
+                    } else {
+                        nodes = &((pm_interpolated_string_node_t *) node)->parts;
+                    }
+
+                    parse_heredoc_dedent(parser, nodes, common_whitespace);
+                }
+            }
+
+            if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
+                return parse_strings(parser, node, false, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_INSTANCE_VARIABLE: {
+            parser_lex(parser);
+            pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
+
+            if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            return node;
+        }
+        case PM_TOKEN_INTEGER: {
+            pm_node_flags_t base = parser->integer_base;
+            parser_lex(parser);
+            return UP(pm_integer_node_create(parser, base, &parser->previous));
+        }
+        case PM_TOKEN_INTEGER_IMAGINARY: {
+            pm_node_flags_t base = parser->integer_base;
+            parser_lex(parser);
+            return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
+        }
+        case PM_TOKEN_INTEGER_RATIONAL: {
+            pm_node_flags_t base = parser->integer_base;
+            parser_lex(parser);
+            return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
+        }
+        case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
+            pm_node_flags_t base = parser->integer_base;
+            parser_lex(parser);
+            return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
+        }
+        case PM_TOKEN_KEYWORD___ENCODING__:
+            parser_lex(parser);
+            return UP(pm_source_encoding_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD___FILE__:
+            parser_lex(parser);
+            return UP(pm_source_file_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD___LINE__:
+            parser_lex(parser);
+            return UP(pm_source_line_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD_ALIAS: {
+            if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
+            }
+
+            parser_lex(parser);
+            pm_token_t keyword = parser->previous;
+
+            pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
+            pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
+
+            switch (PM_NODE_TYPE(new_name)) {
+                case PM_BACK_REFERENCE_READ_NODE:
+                case PM_NUMBERED_REFERENCE_READ_NODE:
+                case PM_GLOBAL_VARIABLE_READ_NODE: {
+                    if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
+                        if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
+                            pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
+                        }
+                    } else {
+                        pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+                    }
+
+                    return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
+                }
+                case PM_SYMBOL_NODE:
+                case PM_INTERPOLATED_SYMBOL_NODE: {
+                    if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
+                        pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+                    }
+                }
+                PRISM_FALLTHROUGH
+                default:
+                    return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
+            }
+        }
+        case PM_TOKEN_KEYWORD_CASE: {
+            size_t opening_newline_index = token_newline_index(parser);
+            parser_lex(parser);
+
+            pm_token_t case_keyword = parser->previous;
+            pm_node_t *predicate = NULL;
+
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+                predicate = NULL;
+            } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
+                predicate = NULL;
+             } else if (!token_begins_expression_p(parser->current.type)) {
+                predicate = NULL;
+            } else {
+                predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
+                while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+            }
+
+            if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+                parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+                parser_lex(parser);
+
+                pop_block_exits(parser, previous_block_exits);
+                pm_node_list_free(&current_block_exits);
+
+                pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+                return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
+            }
+
+            // At this point we can create a case node, though we don't yet know
+            // if it is a case-in or case-when node.
+            pm_token_t end_keyword = not_provided(parser);
+            pm_node_t *node;
+
+            if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+                pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
+                pm_static_literals_t literals = { 0 };
+
+                // At this point we've seen a when keyword, so we know this is a
+                // case-when node. We will continue to parse the when nodes
+                // until we hit the end of the list.
+                while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+                    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+                    parser_lex(parser);
+
+                    pm_token_t when_keyword = parser->previous;
+                    pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
+
+                    do {
+                        if (accept1(parser, PM_TOKEN_USTAR)) {
+                            pm_token_t operator = parser->previous;
+                            pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+
+                            pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
+                            pm_when_node_conditions_append(when_node, UP(splat_node));
+
+                            if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
+                        } else {
+                            pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
+                            pm_when_node_conditions_append(when_node, condition);
+
+                            // If we found a missing node, then this is a syntax
+                            // error and we should stop looping.
+                            if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
+
+                            // If this is a string node, then we need to mark it
+                            // as frozen because when clause strings are frozen.
+                            if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
+                                pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+                            } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+                                pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
+                            }
+
+                            pm_when_clause_static_literals_add(parser, &literals, condition);
+                        }
+                    } while (accept1(parser, PM_TOKEN_COMMA));
+
+                    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                        if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                            pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
+                        }
+                    } else {
+                        expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+                        pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
+                    }
+
+                    if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                        pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
+                        if (statements != NULL) {
+                            pm_when_node_statements_set(when_node, statements);
+                        }
+                    }
+
+                    pm_case_node_condition_append(case_node, UP(when_node));
+                }
+
+                // If we didn't parse any conditions (in or when) then we need
+                // to indicate that we have an error.
+                if (case_node->conditions.size == 0) {
+                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+                }
+
+                pm_static_literals_free(&literals);
+                node = UP(case_node);
+            } else {
+                pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
+
+                // If this is a case-match node (i.e., it is a pattern matching
+                // case statement) then we must have a predicate.
+                if (predicate == NULL) {
+                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
+                }
+
+                // At this point we expect that we're parsing a case-in node. We
+                // will continue to parse the in nodes until we hit the end of
+                // the list.
+                while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
+                    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+
+                    bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+                    parser->pattern_matching_newlines = true;
+
+                    lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+                    parser->command_start = false;
+                    parser_lex(parser);
+
+                    pm_token_t in_keyword = parser->previous;
+
+                    pm_constant_id_list_t captures = { 0 };
+                    pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
+                    parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+                    pm_constant_id_list_free(&captures);
+
+                    // Since we're in the top-level of the case-in node we need
+                    // to check for guard clauses in the form of `if` or
+                    // `unless` statements.
+                    if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
+                        pm_token_t keyword = parser->previous;
+                        pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+                        pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
+                    } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
+                        pm_token_t keyword = parser->previous;
+                        pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+                        pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
+                    }
+
+                    // Now we need to check for the terminator of the in node's
+                    // pattern. It can be a newline or semicolon optionally
+                    // followed by a `then` keyword.
+                    pm_token_t then_keyword;
+                    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                        if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                            then_keyword = parser->previous;
+                        } else {
+                            then_keyword = not_provided(parser);
+                        }
+                    } else {
+                        expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
+                        then_keyword = parser->previous;
+                    }
+
+                    // Now we can actually parse the statements associated with
+                    // the in node.
+                    pm_statements_node_t *statements;
+                    if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                        statements = NULL;
+                    } else {
+                        statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
+                    }
+
+                    // Now that we have the full pattern and statements, we can
+                    // create the node and attach it to the case node.
+                    pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword));
+                    pm_case_match_node_condition_append(case_node, condition);
+                }
+
+                // If we didn't parse any conditions (in or when) then we need
+                // to indicate that we have an error.
+                if (case_node->conditions.size == 0) {
+                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+                }
+
+                node = UP(case_node);
+            }
+
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+            if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+                pm_token_t else_keyword = parser->previous;
+                pm_else_node_t *else_node;
+
+                if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+                    else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
+                } else {
+                    else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
+                }
+
+                if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+                    pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
+                } else {
+                    pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
+                }
+            }
+
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
+
+            if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+                pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
+            } else {
+                pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
+            }
+
+            pop_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return node;
+        }
+        case PM_TOKEN_KEYWORD_BEGIN: {
+            size_t opening_newline_index = token_newline_index(parser);
+            parser_lex(parser);
+
+            pm_token_t begin_keyword = parser->previous;
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+            pm_statements_node_t *begin_statements = NULL;
+
+            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                pm_accepts_block_stack_push(parser, true);
+                begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
+                pm_accepts_block_stack_pop(parser);
+                accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+            }
+
+            pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
+            parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
+
+            begin_node->base.location.end = parser->previous.end;
+            pm_begin_node_end_keyword_set(begin_node, &parser->previous);
+
+            pop_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return UP(begin_node);
+        }
+        case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
+            }
+
+            parser_lex(parser);
+            pm_token_t keyword = parser->previous;
+
+            expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
+            pm_token_t opening = parser->previous;
+            pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
+
+            expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
+            pm_context_t context = parser->current_context->context;
+            if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
+                pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
+            }
+
+            flush_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
+        }
+        case PM_TOKEN_KEYWORD_BREAK:
+        case PM_TOKEN_KEYWORD_NEXT:
+        case PM_TOKEN_KEYWORD_RETURN: {
+            parser_lex(parser);
+
+            pm_token_t keyword = parser->previous;
+            pm_arguments_t arguments = { 0 };
+
+            if (
+                token_begins_expression_p(parser->current.type) ||
+                match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
+            ) {
+                pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
+
+                if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
+                    pm_token_t next = parser->current;
+                    parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+
+                    // Reject `foo && return bar`.
+                    if (!accepts_command_call && arguments.arguments != NULL) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(next.type));
+                    }
+                }
+            }
+
+            switch (keyword.type) {
+                case PM_TOKEN_KEYWORD_BREAK: {
+                    pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
+                    if (!parser->partial_script) parse_block_exit(parser, node);
+                    return node;
+                }
+                case PM_TOKEN_KEYWORD_NEXT: {
+                    pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
+                    if (!parser->partial_script) parse_block_exit(parser, node);
+                    return node;
+                }
+                case PM_TOKEN_KEYWORD_RETURN: {
+                    pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
+                    parse_return(parser, node);
+                    return node;
+                }
+                default:
+                    assert(false && "unreachable");
+                    return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
+            }
+        }
+        case PM_TOKEN_KEYWORD_SUPER: {
+            parser_lex(parser);
+
+            pm_token_t keyword = parser->previous;
+            pm_arguments_t arguments = { 0 };
+            parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+
+            if (
+                arguments.opening_loc.start == NULL &&
+                arguments.arguments == NULL &&
+                ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
+            ) {
+                return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
+            }
+
+            return UP(pm_super_node_create(parser, &keyword, &arguments));
+        }
+        case PM_TOKEN_KEYWORD_YIELD: {
+            parser_lex(parser);
+
+            pm_token_t keyword = parser->previous;
+            pm_arguments_t arguments = { 0 };
+            parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
+
+            // It's possible that we've parsed a block argument through our
+            // call to parse_arguments_list. If we found one, we should mark it
+            // as invalid and destroy it, as we don't have a place for it on the
+            // yield node.
+            if (arguments.block != NULL) {
+                pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
+                pm_node_unreference(parser, arguments.block);
+                pm_node_destroy(parser, arguments.block);
+                arguments.block = NULL;
+            }
+
+            pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
+            if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
+
+            return node;
+        }
+        case PM_TOKEN_KEYWORD_CLASS: {
+            size_t opening_newline_index = token_newline_index(parser);
+            parser_lex(parser);
+
+            pm_token_t class_keyword = parser->previous;
+            pm_do_loop_stack_push(parser, false);
+
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            if (accept1(parser, PM_TOKEN_LESS_LESS)) {
+                pm_token_t operator = parser->previous;
+                pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
+
+                pm_parser_scope_push(parser, true);
+                if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
+                }
+
+                pm_node_t *statements = NULL;
+                if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                    pm_accepts_block_stack_push(parser, true);
+                    statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
+                    pm_accepts_block_stack_pop(parser);
+                }
+
+                if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+                    assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+                    statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
+                } else {
+                    parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+                }
+
+                expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+                pm_constant_id_list_t locals;
+                pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+                pm_parser_scope_pop(parser);
+                pm_do_loop_stack_pop(parser);
+
+                flush_block_exits(parser, previous_block_exits);
+                pm_node_list_free(&current_block_exits);
+
+                return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
+            }
+
+            pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
+            pm_token_t name = parser->previous;
+            if (name.type != PM_TOKEN_CONSTANT) {
+                pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
+            }
+
+            pm_token_t inheritance_operator;
+            pm_node_t *superclass;
+
+            if (match1(parser, PM_TOKEN_LESS)) {
+                inheritance_operator = parser->current;
+                lex_state_set(parser, PM_LEX_STATE_BEG);
+
+                parser->command_start = true;
+                parser_lex(parser);
+
+                superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
+            } else {
+                inheritance_operator = not_provided(parser);
+                superclass = NULL;
+            }
+
+            pm_parser_scope_push(parser, true);
+
+            if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
+                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
+            } else {
+                accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+            }
+            pm_node_t *statements = NULL;
+
+            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                pm_accepts_block_stack_push(parser, true);
+                statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
+                pm_accepts_block_stack_pop(parser);
+            }
+
+            if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+                statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
+            } else {
+                parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+            }
+
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+            if (context_def_p(parser)) {
+                pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
+            }
+
+            pm_constant_id_list_t locals;
+            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+            pm_parser_scope_pop(parser);
+            pm_do_loop_stack_pop(parser);
+
+            if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
+                pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
+            }
+
+            pop_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous));
+        }
+        case PM_TOKEN_KEYWORD_DEF: {
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            pm_token_t def_keyword = parser->current;
+            size_t opening_newline_index = token_newline_index(parser);
+
+            pm_node_t *receiver = NULL;
+            pm_token_t operator = not_provided(parser);
+            pm_token_t name;
+
+            // This context is necessary for lexing `...` in a bare params
+            // correctly. It must be pushed before lexing the first param, so it
+            // is here.
+            context_push(parser, PM_CONTEXT_DEF_PARAMS);
+            parser_lex(parser);
+
+            // This will be false if the method name is not a valid identifier
+            // but could be followed by an operator.
+            bool valid_name = true;
+
+            switch (parser->current.type) {
+                case PM_CASE_OPERATOR:
+                    pm_parser_scope_push(parser, true);
+                    lex_state_set(parser, PM_LEX_STATE_ENDFN);
+                    parser_lex(parser);
+
+                    name = parser->previous;
+                    break;
+                case PM_TOKEN_IDENTIFIER: {
+                    parser_lex(parser);
+
+                    if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+                        receiver = parse_variable_call(parser);
+
+                        pm_parser_scope_push(parser, true);
+                        lex_state_set(parser, PM_LEX_STATE_FNAME);
+                        parser_lex(parser);
+
+                        operator = parser->previous;
+                        name = parse_method_definition_name(parser);
+                    } else {
+                        pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
+                        pm_parser_scope_push(parser, true);
+
+                        name = parser->previous;
+                    }
+
+                    break;
+                }
+                case PM_TOKEN_INSTANCE_VARIABLE:
+                case PM_TOKEN_CLASS_VARIABLE:
+                case PM_TOKEN_GLOBAL_VARIABLE:
+                    valid_name = false;
+                    PRISM_FALLTHROUGH
+                case PM_TOKEN_CONSTANT:
+                case PM_TOKEN_KEYWORD_NIL:
+                case PM_TOKEN_KEYWORD_SELF:
+                case PM_TOKEN_KEYWORD_TRUE:
+                case PM_TOKEN_KEYWORD_FALSE:
+                case PM_TOKEN_KEYWORD___FILE__:
+                case PM_TOKEN_KEYWORD___LINE__:
+                case PM_TOKEN_KEYWORD___ENCODING__: {
+                    pm_parser_scope_push(parser, true);
+                    parser_lex(parser);
+
+                    pm_token_t identifier = parser->previous;
+
+                    if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+                        lex_state_set(parser, PM_LEX_STATE_FNAME);
+                        parser_lex(parser);
+                        operator = parser->previous;
+
+                        switch (identifier.type) {
+                            case PM_TOKEN_CONSTANT:
+                                receiver = UP(pm_constant_read_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_INSTANCE_VARIABLE:
+                                receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_CLASS_VARIABLE:
+                                receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_GLOBAL_VARIABLE:
+                                receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD_NIL:
+                                receiver = UP(pm_nil_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD_SELF:
+                                receiver = UP(pm_self_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD_TRUE:
+                                receiver = UP(pm_true_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD_FALSE:
+                                receiver = UP(pm_false_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD___FILE__:
+                                receiver = UP(pm_source_file_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD___LINE__:
+                                receiver = UP(pm_source_line_node_create(parser, &identifier));
+                                break;
+                            case PM_TOKEN_KEYWORD___ENCODING__:
+                                receiver = UP(pm_source_encoding_node_create(parser, &identifier));
+                                break;
+                            default:
+                                break;
+                        }
+
+                        name = parse_method_definition_name(parser);
+                    } else {
+                        if (!valid_name) {
+                            PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
+                        }
+
+                        name = identifier;
+                    }
+                    break;
+                }
+                case PM_TOKEN_PARENTHESIS_LEFT: {
+                    // The current context is `PM_CONTEXT_DEF_PARAMS`, however
+                    // the inner expression of this parenthesis should not be
+                    // processed under this context. Thus, the context is popped
+                    // here.
+                    context_pop(parser);
+                    parser_lex(parser);
+
+                    pm_token_t lparen = parser->previous;
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
+
+                    accept1(parser, PM_TOKEN_NEWLINE);
+                    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+                    pm_token_t rparen = parser->previous;
+
+                    lex_state_set(parser, PM_LEX_STATE_FNAME);
+                    expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
+
+                    operator = parser->previous;
+                    receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
+
+                    // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
+                    // reason as described the above.
+                    pm_parser_scope_push(parser, true);
+                    context_push(parser, PM_CONTEXT_DEF_PARAMS);
+                    name = parse_method_definition_name(parser);
+                    break;
+                }
+                default:
+                    pm_parser_scope_push(parser, true);
+                    name = parse_method_definition_name(parser);
+                    break;
+            }
+
+            pm_token_t lparen;
+            pm_token_t rparen;
+            pm_parameters_node_t *params;
+
+            bool accept_endless_def = true;
+            switch (parser->current.type) {
+                case PM_TOKEN_PARENTHESIS_LEFT: {
+                    parser_lex(parser);
+                    lparen = parser->previous;
+
+                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                        params = NULL;
+                    } else {
+                        params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
+                    }
+
+                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                    parser->command_start = true;
+
+                    context_pop(parser);
+                    if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
+                        parser->previous.start = parser->previous.end;
+                        parser->previous.type = PM_TOKEN_MISSING;
+                    }
+
+                    rparen = parser->previous;
+                    break;
+                }
+                case PM_CASE_PARAMETER: {
+                    // If we're about to lex a label, we need to add the label
+                    // state to make sure the next newline is ignored.
+                    if (parser->current.type == PM_TOKEN_LABEL) {
+                        lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
+                    }
+
+                    lparen = not_provided(parser);
+                    rparen = not_provided(parser);
+                    params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
+
+                    // Reject `def * = 1` and similar. We have to specifically check
+                    // for them because they create ambiguity with optional arguments.
+                    accept_endless_def = false;
+
+                    context_pop(parser);
+                    break;
+                }
+                default: {
+                    lparen = not_provided(parser);
+                    rparen = not_provided(parser);
+                    params = NULL;
+
+                    context_pop(parser);
+                    break;
+                }
+            }
+
+            pm_node_t *statements = NULL;
+            pm_token_t equal;
+            pm_token_t end_keyword;
+
+            if (accept1(parser, PM_TOKEN_EQUAL)) {
+                if (token_is_setter_name(&name)) {
+                    pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
+                }
+                if (!accept_endless_def) {
+                    pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
+                }
+                if (
+                    parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
+                    parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
+                ) {
+                    PM_PARSER_ERR_FORMAT(parser, def_keyword.start, parser->previous.end, PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
+                }
+                equal = parser->previous;
+
+                context_push(parser, PM_CONTEXT_DEF);
+                pm_do_loop_stack_push(parser, false);
+                statements = UP(pm_statements_node_create(parser));
+
+                bool allow_command_call;
+                if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+                    allow_command_call = accepts_command_call;
+                } else {
+                    // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
+                    allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
+                }
+
+                pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
+
+                if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+                    context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+                    pm_token_t rescue_keyword = parser->previous;
+                    pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+                    context_pop(parser);
+
+                    statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
+                }
+
+                pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
+                pm_do_loop_stack_pop(parser);
+                context_pop(parser);
+                end_keyword = not_provided(parser);
+            } else {
+                equal = not_provided(parser);
+
+                if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
+                    lex_state_set(parser, PM_LEX_STATE_BEG);
+                    parser->command_start = true;
+                    expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
+                } else {
+                    accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+                }
+
+                pm_accepts_block_stack_push(parser, true);
+                pm_do_loop_stack_push(parser, false);
+
+                if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                    pm_accepts_block_stack_push(parser, true);
+                    statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
+                    pm_accepts_block_stack_pop(parser);
+                }
+
+                if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+                    assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+                    statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
+                } else {
+                    parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
+                }
+
+                pm_accepts_block_stack_pop(parser);
+                pm_do_loop_stack_pop(parser);
+
+                expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
+                end_keyword = parser->previous;
+            }
+
+            pm_constant_id_list_t locals;
+            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+            pm_parser_scope_pop(parser);
+
+            /**
+             * If the final character is `@` as is the case when defining
+             * methods to override the unary operators, we should ignore
+             * the @ in the same way we do for symbols.
+             */
+            pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
+
+            flush_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return UP(pm_def_node_create(
+                parser,
+                name_id,
+                &name,
+                receiver,
+                params,
+                statements,
+                &locals,
+                &def_keyword,
+                &operator,
+                &lparen,
+                &rparen,
+                &equal,
+                &end_keyword
+            ));
+        }
+        case PM_TOKEN_KEYWORD_DEFINED: {
+            parser_lex(parser);
+            pm_token_t keyword = parser->previous;
+
+            pm_token_t lparen;
+            pm_token_t rparen;
+            pm_node_t *expression;
+
+            context_push(parser, PM_CONTEXT_DEFINED);
+            bool newline = accept1(parser, PM_TOKEN_NEWLINE);
+
+            if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+                lparen = parser->previous;
+
+                if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                    expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
+                    lparen = not_provided(parser);
+                    rparen = not_provided(parser);
+                } else {
+                    expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+
+                    if (parser->recovering) {
+                        rparen = not_provided(parser);
+                    } else {
+                        accept1(parser, PM_TOKEN_NEWLINE);
+                        expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+                        rparen = parser->previous;
+                    }
+                }
+            } else {
+                lparen = not_provided(parser);
+                rparen = not_provided(parser);
+                expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+            }
+
+            context_pop(parser);
+            return UP(pm_defined_node_create(
+                parser,
+                &lparen,
+                expression,
+                &rparen,
+                &keyword
+            ));
+        }
+        case PM_TOKEN_KEYWORD_END_UPCASE: {
+            if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
+            }
+
+            parser_lex(parser);
+            pm_token_t keyword = parser->previous;
+
+            if (context_def_p(parser)) {
+                pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
+            }
+
+            expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
+            pm_token_t opening = parser->previous;
+            pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
+
+            expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
+            return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
+        }
+        case PM_TOKEN_KEYWORD_FALSE:
+            parser_lex(parser);
+            return UP(pm_false_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD_FOR: {
+            size_t opening_newline_index = token_newline_index(parser);
+            parser_lex(parser);
+
+            pm_token_t for_keyword = parser->previous;
+            pm_node_t *index;
+
+            context_push(parser, PM_CONTEXT_FOR_INDEX);
+
+            // First, parse out the first index expression.
+            if (accept1(parser, PM_TOKEN_USTAR)) {
+                pm_token_t star_operator = parser->previous;
+                pm_node_t *name = NULL;
+
+                if (token_begins_expression_p(parser->current.type)) {
+                    name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+                }
+
+                index = UP(pm_splat_node_create(parser, &star_operator, name));
+            } else if (token_begins_expression_p(parser->current.type)) {
+                index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+            } else {
+                pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
+                index = UP(pm_missing_node_create(parser, for_keyword.start, for_keyword.end));
+            }
+
+            // Now, if there are multiple index expressions, parse them out.
+            if (match1(parser, PM_TOKEN_COMMA)) {
+                index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            } else {
+                index = parse_target(parser, index, false, false);
+            }
+
+            context_pop(parser);
+            pm_do_loop_stack_push(parser, true);
+
+            expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
+            pm_token_t in_keyword = parser->previous;
+
+            pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
+            pm_do_loop_stack_pop(parser);
+
+            pm_token_t do_keyword;
+            if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+                do_keyword = parser->previous;
+            } else {
+                do_keyword = not_provided(parser);
+                if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
+                }
+            }
+
+            pm_statements_node_t *statements = NULL;
+            if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+                statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
+            }
+
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
+
+            return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous));
+        }
+        case PM_TOKEN_KEYWORD_IF:
+            if (parser_end_of_line_p(parser)) {
+                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+            }
+
+            size_t opening_newline_index = token_newline_index(parser);
+            bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
+            parser_lex(parser);
+
+            return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
+        case PM_TOKEN_KEYWORD_UNDEF: {
+            if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
+            }
+
+            parser_lex(parser);
+            pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
+            pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
+
+            if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
+                pm_node_destroy(parser, name);
+            } else {
+                pm_undef_node_append(undef, name);
+
+                while (match1(parser, PM_TOKEN_COMMA)) {
+                    lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
+                    parser_lex(parser);
+                    name = parse_undef_argument(parser, (uint16_t) (depth + 1));
+
+                    if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
+                        pm_node_destroy(parser, name);
+                        break;
+                    }
+
+                    pm_undef_node_append(undef, name);
+                }
+            }
+
+            return UP(undef);
+        }
+        case PM_TOKEN_KEYWORD_NOT: {
+            parser_lex(parser);
+
+            pm_token_t message = parser->previous;
+            pm_arguments_t arguments = { 0 };
+            pm_node_t *receiver = NULL;
+
+            // If we do not accept a command call, then we also do not accept a
+            // not without parentheses. In this case we need to reject this
+            // syntax.
+            if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+                if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
+                    pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
+                } else {
+                    accept1(parser, PM_TOKEN_NEWLINE);
+                    pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
+                }
+
+                return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+            }
+
+            accept1(parser, PM_TOKEN_NEWLINE);
+
+            if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+                pm_token_t lparen = parser->previous;
+
+                if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                    receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
+                } else {
+                    arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
+                    receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+
+                    if (!parser->recovering) {
+                        accept1(parser, PM_TOKEN_NEWLINE);
+                        expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+                        arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+                    }
+                }
+            } else {
+                receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+            }
+
+            return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
+        }
+        case PM_TOKEN_KEYWORD_UNLESS: {
+            size_t opening_newline_index = token_newline_index(parser);
+            parser_lex(parser);
+
+            return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
+        }
+        case PM_TOKEN_KEYWORD_MODULE: {
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            size_t opening_newline_index = token_newline_index(parser);
+            parser_lex(parser);
+            pm_token_t module_keyword = parser->previous;
+
+            pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
+            pm_token_t name;
+
+            // If we can recover from a syntax error that occurred while parsing
+            // the name of the module, then we'll handle that here.
+            if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
+                pop_block_exits(parser, previous_block_exits);
+                pm_node_list_free(&current_block_exits);
+
+                pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
+            }
+
+            while (accept1(parser, PM_TOKEN_COLON_COLON)) {
+                pm_token_t double_colon = parser->previous;
+
+                expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+                constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
+            }
+
+            // Here we retrieve the name of the module. If it wasn't a constant,
+            // then it's possible that `module foo` was passed, which is a
+            // syntax error. We handle that here as well.
+            name = parser->previous;
+            if (name.type != PM_TOKEN_CONSTANT) {
+                pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
+            }
+
+            pm_parser_scope_push(parser, true);
+            accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
+            pm_node_t *statements = NULL;
+
+            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                pm_accepts_block_stack_push(parser, true);
+                statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
+                pm_accepts_block_stack_pop(parser);
+            }
+
+            if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+                statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
+            } else {
+                parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
+            }
+
+            pm_constant_id_list_t locals;
+            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+            pm_parser_scope_pop(parser);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
+
+            if (context_def_p(parser)) {
+                pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
+            }
+
+            pop_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
+        }
+        case PM_TOKEN_KEYWORD_NIL:
+            parser_lex(parser);
+            return UP(pm_nil_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD_REDO: {
+            parser_lex(parser);
+
+            pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
+            if (!parser->partial_script) parse_block_exit(parser, node);
+
+            return node;
+        }
+        case PM_TOKEN_KEYWORD_RETRY: {
+            parser_lex(parser);
+
+            pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
+            parse_retry(parser, node);
+
+            return node;
+        }
+        case PM_TOKEN_KEYWORD_SELF:
+            parser_lex(parser);
+            return UP(pm_self_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD_TRUE:
+            parser_lex(parser);
+            return UP(pm_true_node_create(parser, &parser->previous));
+        case PM_TOKEN_KEYWORD_UNTIL: {
+            size_t opening_newline_index = token_newline_index(parser);
+
+            context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
+            pm_do_loop_stack_push(parser, true);
+
+            parser_lex(parser);
+            pm_token_t keyword = parser->previous;
+            pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+
+            pm_do_loop_stack_pop(parser);
+            context_pop(parser);
+
+            pm_token_t do_keyword;
+            if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+                do_keyword = parser->previous;
+            } else {
+                do_keyword = not_provided(parser);
+                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
+            }
+
+            pm_statements_node_t *statements = NULL;
+            if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+                pm_accepts_block_stack_push(parser, true);
+                statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
+                pm_accepts_block_stack_pop(parser);
+                accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+            }
+
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
+
+            return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
+        }
+        case PM_TOKEN_KEYWORD_WHILE: {
+            size_t opening_newline_index = token_newline_index(parser);
+
+            context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
+            pm_do_loop_stack_push(parser, true);
+
+            parser_lex(parser);
+            pm_token_t keyword = parser->previous;
+            pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+
+            pm_do_loop_stack_pop(parser);
+            context_pop(parser);
+
+            pm_token_t do_keyword;
+            if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
+                do_keyword = parser->previous;
+            } else {
+                do_keyword = not_provided(parser);
+                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
+            }
+
+            pm_statements_node_t *statements = NULL;
+            if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+                pm_accepts_block_stack_push(parser, true);
+                statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
+                pm_accepts_block_stack_pop(parser);
+                accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+            }
+
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
+            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
+
+            return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0));
+        }
+        case PM_TOKEN_PERCENT_LOWER_I: {
+            parser_lex(parser);
+            pm_token_t opening = parser->previous;
+            pm_array_node_t *array = pm_array_node_create(parser, &opening);
+            pm_node_t *current = NULL;
+
+            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+                accept1(parser, PM_TOKEN_WORDS_SEP);
+                if (match1(parser, PM_TOKEN_STRING_END)) break;
+
+                // Interpolation is not possible but nested heredocs can still lead to
+                // consecutive (disjoint) string tokens when the final newline is escaped.
+                while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                    pm_token_t opening = not_provided(parser);
+                    pm_token_t closing = not_provided(parser);
+
+                    // Record the string node, moving to interpolation if needed.
+                    if (current == NULL) {
+                        current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+                        parser_lex(parser);
+                    } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                        pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+                        parser_lex(parser);
+                        pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
+                    } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                        pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+                        pm_token_t bounds = not_provided(parser);
+
+                        pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
+                        pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
+                        pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
+                        parser_lex(parser);
+
+                        pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+                        pm_interpolated_symbol_node_append(interpolated, first_string);
+                        pm_interpolated_symbol_node_append(interpolated, second_string);
+
+                        xfree(current);
+                        current = UP(interpolated);
+                    } else {
+                        assert(false && "unreachable");
+                    }
+                }
+
+                if (current) {
+                    pm_array_node_elements_append(array, current);
+                    current = NULL;
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+                }
+            }
+
+            pm_token_t closing = parser->current;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
+                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+            } else {
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
+            }
+            pm_array_node_close_set(array, &closing);
+
+            return UP(array);
+        }
+        case PM_TOKEN_PERCENT_UPPER_I: {
+            parser_lex(parser);
+            pm_token_t opening = parser->previous;
+            pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+            // This is the current node that we are parsing that will be added to the
+            // list of elements.
+            pm_node_t *current = NULL;
+
+            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+                switch (parser->current.type) {
+                    case PM_TOKEN_WORDS_SEP: {
+                        if (current == NULL) {
+                            // If we hit a separator before we have any content, then we don't
+                            // need to do anything.
+                        } else {
+                            // If we hit a separator after we've hit content, then we need to
+                            // append that content to the list and reset the current node.
+                            pm_array_node_elements_append(array, current);
+                            current = NULL;
+                        }
+
+                        parser_lex(parser);
+                        break;
+                    }
+                    case PM_TOKEN_STRING_CONTENT: {
+                        pm_token_t opening = not_provided(parser);
+                        pm_token_t closing = not_provided(parser);
+
+                        if (current == NULL) {
+                            // If we hit content and the current node is NULL, then this is
+                            // the first string content we've seen. In that case we're going
+                            // to create a new string node and set that to the current.
+                            current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+                            parser_lex(parser);
+                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                            // If we hit string content and the current node is an
+                            // interpolated string, then we need to append the string content
+                            // to the list of child nodes.
+                            pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+                            parser_lex(parser);
+
+                            pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
+                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                            // If we hit string content and the current node is a symbol node,
+                            // then we need to convert the current node into an interpolated
+                            // string and add the string content to the list of child nodes.
+                            pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+                            pm_token_t bounds = not_provided(parser);
+
+                            pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
+                            pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped));
+                            pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing));
+                            parser_lex(parser);
+
+                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+                            pm_interpolated_symbol_node_append(interpolated, first_string);
+                            pm_interpolated_symbol_node_append(interpolated, second_string);
+
+                            xfree(current);
+                            current = UP(interpolated);
+                        } else {
+                            assert(false && "unreachable");
+                        }
+
+                        break;
+                    }
+                    case PM_TOKEN_EMBVAR: {
+                        bool start_location_set = false;
+                        if (current == NULL) {
+                            // If we hit an embedded variable and the current node is NULL,
+                            // then this is the start of a new string. We'll set the current
+                            // node to a new interpolated string.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
+                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                            // If we hit an embedded variable and the current node is a string
+                            // node, then we'll convert the current into an interpolated
+                            // string and add the string node to the list of parts.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+
+                            current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+                            pm_interpolated_symbol_node_append(interpolated, current);
+                            interpolated->base.location.start = current->location.start;
+                            start_location_set = true;
+                            current = UP(interpolated);
+                        } else {
+                            // If we hit an embedded variable and the current node is an
+                            // interpolated string, then we'll just add the embedded variable.
+                        }
+
+                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                        pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
+                        if (!start_location_set) {
+                            current->location.start = part->location.start;
+                        }
+                        break;
+                    }
+                    case PM_TOKEN_EMBEXPR_BEGIN: {
+                        bool start_location_set = false;
+                        if (current == NULL) {
+                            // If we hit an embedded expression and the current node is NULL,
+                            // then this is the start of a new string. We'll set the current
+                            // node to a new interpolated string.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            current = UP(pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing));
+                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                            // If we hit an embedded expression and the current node is a
+                            // string node, then we'll convert the current into an
+                            // interpolated string and add the string node to the list of
+                            // parts.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+
+                            current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+                            pm_interpolated_symbol_node_append(interpolated, current);
+                            interpolated->base.location.start = current->location.start;
+                            start_location_set = true;
+                            current = UP(interpolated);
+                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                            // If we hit an embedded expression and the current node is an
+                            // interpolated string, then we'll just continue on.
+                        } else {
+                            assert(false && "unreachable");
+                        }
+
+                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                        pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
+                        if (!start_location_set) {
+                            current->location.start = part->location.start;
+                        }
+                        break;
+                    }
+                    default:
+                        expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
+                        parser_lex(parser);
+                        break;
+                }
+            }
+
+            // If we have a current node, then we need to append it to the list.
+            if (current) {
+                pm_array_node_elements_append(array, current);
+            }
+
+            pm_token_t closing = parser->current;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
+                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+            } else {
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+            }
+            pm_array_node_close_set(array, &closing);
+
+            return UP(array);
+        }
+        case PM_TOKEN_PERCENT_LOWER_W: {
+            parser_lex(parser);
+            pm_token_t opening = parser->previous;
+            pm_array_node_t *array = pm_array_node_create(parser, &opening);
+            pm_node_t *current = NULL;
+
+            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+                accept1(parser, PM_TOKEN_WORDS_SEP);
+                if (match1(parser, PM_TOKEN_STRING_END)) break;
+
+                // Interpolation is not possible but nested heredocs can still lead to
+                // consecutive (disjoint) string tokens when the final newline is escaped.
+                while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                    pm_token_t opening = not_provided(parser);
+                    pm_token_t closing = not_provided(parser);
+
+                    pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+
+                    // Record the string node, moving to interpolation if needed.
+                    if (current == NULL) {
+                        current = string;
+                    } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                        pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
+                    } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                        pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+                        pm_interpolated_string_node_append(interpolated, current);
+                        pm_interpolated_string_node_append(interpolated, string);
+                        current = UP(interpolated);
+                    } else {
+                        assert(false && "unreachable");
+                    }
+                    parser_lex(parser);
+                }
+
+                if (current) {
+                    pm_array_node_elements_append(array, current);
+                    current = NULL;
+                } else {
+                    expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                }
+            }
+
+            pm_token_t closing = parser->current;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
+                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+            } else {
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
+            }
+
+            pm_array_node_close_set(array, &closing);
+            return UP(array);
+        }
+        case PM_TOKEN_PERCENT_UPPER_W: {
+            parser_lex(parser);
+            pm_token_t opening = parser->previous;
+            pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+            // This is the current node that we are parsing that will be added
+            // to the list of elements.
+            pm_node_t *current = NULL;
+
+            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+                switch (parser->current.type) {
+                    case PM_TOKEN_WORDS_SEP: {
+                        // Reset the explicit encoding if we hit a separator
+                        // since each element can have its own encoding.
+                        parser->explicit_encoding = NULL;
+
+                        if (current == NULL) {
+                            // If we hit a separator before we have any content,
+                            // then we don't need to do anything.
+                        } else {
+                            // If we hit a separator after we've hit content,
+                            // then we need to append that content to the list
+                            // and reset the current node.
+                            pm_array_node_elements_append(array, current);
+                            current = NULL;
+                        }
+
+                        parser_lex(parser);
+                        break;
+                    }
+                    case PM_TOKEN_STRING_CONTENT: {
+                        pm_token_t opening = not_provided(parser);
+                        pm_token_t closing = not_provided(parser);
+
+                        pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing));
+                        pm_node_flag_set(string, parse_unescaped_encoding(parser));
+                        parser_lex(parser);
+
+                        if (current == NULL) {
+                            // If we hit content and the current node is NULL,
+                            // then this is the first string content we've seen.
+                            // In that case we're going to create a new string
+                            // node and set that to the current.
+                            current = string;
+                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                            // If we hit string content and the current node is
+                            // an interpolated string, then we need to append
+                            // the string content to the list of child nodes.
+                            pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
+                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                            // If we hit string content and the current node is
+                            // a string node, then we need to convert the
+                            // current node into an interpolated string and add
+                            // the string content to the list of child nodes.
+                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+                            pm_interpolated_string_node_append(interpolated, current);
+                            pm_interpolated_string_node_append(interpolated, string);
+                            current = UP(interpolated);
+                        } else {
+                            assert(false && "unreachable");
+                        }
+
+                        break;
+                    }
+                    case PM_TOKEN_EMBVAR: {
+                        if (current == NULL) {
+                            // If we hit an embedded variable and the current
+                            // node is NULL, then this is the start of a new
+                            // string. We'll set the current node to a new
+                            // interpolated string.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
+                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                            // If we hit an embedded variable and the current
+                            // node is a string node, then we'll convert the
+                            // current into an interpolated string and add the
+                            // string node to the list of parts.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+                            pm_interpolated_string_node_append(interpolated, current);
+                            current = UP(interpolated);
+                        } else {
+                            // If we hit an embedded variable and the current
+                            // node is an interpolated string, then we'll just
+                            // add the embedded variable.
+                        }
+
+                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                        pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
+                        break;
+                    }
+                    case PM_TOKEN_EMBEXPR_BEGIN: {
+                        if (current == NULL) {
+                            // If we hit an embedded expression and the current
+                            // node is NULL, then this is the start of a new
+                            // string. We'll set the current node to a new
+                            // interpolated string.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            current = UP(pm_interpolated_string_node_create(parser, &opening, NULL, &closing));
+                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                            // If we hit an embedded expression and the current
+                            // node is a string node, then we'll convert the
+                            // current into an interpolated string and add the
+                            // string node to the list of parts.
+                            pm_token_t opening = not_provided(parser);
+                            pm_token_t closing = not_provided(parser);
+                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+                            pm_interpolated_string_node_append(interpolated, current);
+                            current = UP(interpolated);
+                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                            // If we hit an embedded expression and the current
+                            // node is an interpolated string, then we'll just
+                            // continue on.
+                        } else {
+                            assert(false && "unreachable");
+                        }
+
+                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                        pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
+                        break;
+                    }
+                    default:
+                        expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
+                        parser_lex(parser);
+                        break;
+                }
+            }
+
+            // If we have a current node, then we need to append it to the list.
+            if (current) {
+                pm_array_node_elements_append(array, current);
+            }
+
+            pm_token_t closing = parser->current;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
+                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+            } else {
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
+            }
+
+            pm_array_node_close_set(array, &closing);
+            return UP(array);
+        }
+        case PM_TOKEN_REGEXP_BEGIN: {
+            pm_token_t opening = parser->current;
+            parser_lex(parser);
+
+            if (match1(parser, PM_TOKEN_REGEXP_END)) {
+                // If we get here, then we have an end immediately after a start. In
+                // that case we'll create an empty content token and return an
+                // uninterpolated regular expression.
+                pm_token_t content = (pm_token_t) {
+                    .type = PM_TOKEN_STRING_CONTENT,
+                    .start = parser->previous.end,
+                    .end = parser->previous.end
+                };
+
+                parser_lex(parser);
+
+                pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->previous));
+                pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
+
+                return node;
+            }
+
+            pm_interpolated_regular_expression_node_t *interpolated;
+
+            if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                // In this case we've hit string content so we know the regular
+                // expression at least has something in it. We'll need to check if the
+                // following token is the end (in which case we can return a plain
+                // regular expression) or if it's not then it has interpolation.
+                pm_string_t unescaped = parser->current_string;
+                pm_token_t content = parser->current;
+                bool ascii_only = parser->current_regular_expression_ascii_only;
+                parser_lex(parser);
+
+                // If we hit an end, then we can create a regular expression
+                // node without interpolation, which can be represented more
+                // succinctly and more easily compiled.
+                if (accept1(parser, PM_TOKEN_REGEXP_END)) {
+                    pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+
+                    // If we're not immediately followed by a =~, then we want
+                    // to parse all of the errors at this point. If it is
+                    // followed by a =~, then it will get parsed higher up while
+                    // parsing the named captures as well.
+                    if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
+                        parse_regular_expression_errors(parser, node);
+                    }
+
+                    pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
+                    return UP(node);
+                }
+
+                // If we get here, then we have interpolation so we'll need to create
+                // a regular expression node with interpolation.
+                interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
+
+                pm_token_t opening = not_provided(parser);
+                pm_token_t closing = not_provided(parser);
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
+
+                if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+                    // This is extremely strange, but the first string part of a
+                    // regular expression will always be tagged as binary if we
+                    // are in a US-ASCII file, no matter its contents.
+                    pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
+                }
+
+                pm_interpolated_regular_expression_node_append(interpolated, part);
+            } else {
+                // If the first part of the body of the regular expression is not a
+                // string content, then we have interpolation and we need to create an
+                // interpolated regular expression node.
+                interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
+            }
+
+            // Now that we're here and we have interpolation, we'll parse all of the
+            // parts into the list.
+            pm_node_t *part;
+            while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
+                if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+                    pm_interpolated_regular_expression_node_append(interpolated, part);
+                }
+            }
+
+            pm_token_t closing = parser->current;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
+                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+            } else {
+                expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
+            }
+
+            pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
+            return UP(interpolated);
+        }
+        case PM_TOKEN_BACKTICK:
+        case PM_TOKEN_PERCENT_LOWER_X: {
+            parser_lex(parser);
+            pm_token_t opening = parser->previous;
+
+            // When we get here, we don't know if this string is going to have
+            // interpolation or not, even though it is allowed. Still, we want to be
+            // able to return a string node without interpolation if we can since
+            // it'll be faster.
+            if (match1(parser, PM_TOKEN_STRING_END)) {
+                // If we get here, then we have an end immediately after a start. In
+                // that case we'll create an empty content token and return an
+                // uninterpolated string.
+                pm_token_t content = (pm_token_t) {
+                    .type = PM_TOKEN_STRING_CONTENT,
+                    .start = parser->previous.end,
+                    .end = parser->previous.end
+                };
+
+                parser_lex(parser);
+                return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
+            }
+
+            pm_interpolated_x_string_node_t *node;
+
+            if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                // In this case we've hit string content so we know the string
+                // at least has something in it. We'll need to check if the
+                // following token is the end (in which case we can return a
+                // plain string) or if it's not then it has interpolation.
+                pm_string_t unescaped = parser->current_string;
+                pm_token_t content = parser->current;
+                parser_lex(parser);
+
+                if (match1(parser, PM_TOKEN_STRING_END)) {
+                    pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
+                    pm_node_flag_set(node, parse_unescaped_encoding(parser));
+                    parser_lex(parser);
+                    return node;
+                }
+
+                // If we get here, then we have interpolation so we'll need to
+                // create a string node with interpolation.
+                node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
+
+                pm_token_t opening = not_provided(parser);
+                pm_token_t closing = not_provided(parser);
+
+                pm_node_t *part = UP(pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped));
+                pm_node_flag_set(part, parse_unescaped_encoding(parser));
+
+                pm_interpolated_xstring_node_append(node, part);
+            } else {
+                // If the first part of the body of the string is not a string
+                // content, then we have interpolation and we need to create an
+                // interpolated string node.
+                node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
+            }
+
+            pm_node_t *part;
+            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+                if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
+                    pm_interpolated_xstring_node_append(node, part);
+                }
+            }
+
+            pm_token_t closing = parser->current;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
+                closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+            } else {
+                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
+            }
+            pm_interpolated_xstring_node_closing_set(node, &closing);
+
+            return UP(node);
+        }
+        case PM_TOKEN_USTAR: {
+            parser_lex(parser);
+
+            // * operators at the beginning of expressions are only valid in the
+            // context of a multiple assignment. We enforce that here. We'll
+            // still lex past it though and create a missing node place.
+            if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                pm_parser_err_prefix(parser, diag_id);
+                return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
+            }
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *name = NULL;
+
+            if (token_begins_expression_p(parser->current.type)) {
+                name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+            }
+
+            pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
+
+            if (match1(parser, PM_TOKEN_COMMA)) {
+                return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            } else {
+                return parse_target_validate(parser, splat, true);
+            }
+        }
+        case PM_TOKEN_BANG: {
+            if (binding_power > PM_BINDING_POWER_UNARY) {
+                pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+            }
+
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
+
+            pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
+            return UP(node);
+        }
+        case PM_TOKEN_TILDE: {
+            if (binding_power > PM_BINDING_POWER_UNARY) {
+                pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+            }
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
+
+            return UP(node);
+        }
+        case PM_TOKEN_UMINUS: {
+            if (binding_power > PM_BINDING_POWER_UNARY) {
+                pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+            }
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
+
+            return UP(node);
+        }
+        case PM_TOKEN_UMINUS_NUM: {
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+
+            if (accept1(parser, PM_TOKEN_STAR_STAR)) {
+                pm_token_t exponent_operator = parser->previous;
+                pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+                node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
+                node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
+            } else {
+                switch (PM_NODE_TYPE(node)) {
+                    case PM_INTEGER_NODE:
+                    case PM_FLOAT_NODE:
+                    case PM_RATIONAL_NODE:
+                    case PM_IMAGINARY_NODE:
+                        parse_negative_numeric(node);
+                        break;
+                    default:
+                        node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
+                        break;
+                }
+            }
+
+            return node;
+        }
+        case PM_TOKEN_MINUS_GREATER: {
+            int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
+            parser->lambda_enclosure_nesting = parser->enclosure_nesting;
+
+            size_t opening_newline_index = token_newline_index(parser);
+            pm_accepts_block_stack_push(parser, true);
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_parser_scope_push(parser, false);
+
+            pm_block_parameters_node_t *block_parameters;
+
+            switch (parser->current.type) {
+                case PM_TOKEN_PARENTHESIS_LEFT: {
+                    pm_token_t opening = parser->current;
+                    parser_lex(parser);
+
+                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                        block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
+                    } else {
+                        block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
+                    }
+
+                    accept1(parser, PM_TOKEN_NEWLINE);
+                    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+                    pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+                    break;
+                }
+                case PM_CASE_PARAMETER: {
+                    pm_accepts_block_stack_push(parser, false);
+                    pm_token_t opening = not_provided(parser);
+                    block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
+                    pm_accepts_block_stack_pop(parser);
+                    break;
+                }
+                default: {
+                    block_parameters = NULL;
+                    break;
+                }
+            }
+
+            pm_token_t opening;
+            pm_node_t *body = NULL;
+            parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
+
+            if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
+                opening = parser->previous;
+
+                if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
+                    body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
+                }
+
+                parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
+                expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
+            } else {
+                expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
+                opening = parser->previous;
+
+                if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+                    pm_accepts_block_stack_push(parser, true);
+                    body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
+                    pm_accepts_block_stack_pop(parser);
+                }
+
+                if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+                    assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
+                    body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
+                } else {
+                    parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
+                }
+
+                expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
+            }
+
+            pm_constant_id_list_t locals;
+            pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
+            pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
+
+            pm_parser_scope_pop(parser);
+            pm_accepts_block_stack_pop(parser);
+
+            return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
+        }
+        case PM_TOKEN_UPLUS: {
+            if (binding_power > PM_BINDING_POWER_UNARY) {
+                pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
+            }
+            parser_lex(parser);
+
+            pm_token_t operator = parser->previous;
+            pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+            pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
+
+            return UP(node);
+        }
+        case PM_TOKEN_STRING_BEGIN:
+            return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
+        case PM_TOKEN_SYMBOL_BEGIN: {
+            pm_lex_mode_t lex_mode = *parser->lex_modes.current;
+            parser_lex(parser);
+
+            return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
+        }
+        default: {
+            pm_context_t recoverable = context_recoverable(parser, &parser->current);
+
+            if (recoverable != PM_CONTEXT_NONE) {
+                parser->recovering = true;
+
+                // If the given error is not the generic one, then we'll add it
+                // here because it will provide more context in addition to the
+                // recoverable error that we will also add.
+                if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
+                    pm_parser_err_prefix(parser, diag_id);
+                }
+
+                // If we get here, then we are assuming this token is closing a
+                // parent context, so we'll indicate that to the user so that
+                // they know how we behaved.
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+            } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
+                // We're going to make a special case here, because "cannot
+                // parse expression" is pretty generic, and we know here that we
+                // have an unexpected token.
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+            } else {
+                pm_parser_err_prefix(parser, diag_id);
+            }
+
+            return UP(pm_missing_node_create(parser, parser->previous.start, parser->previous.end));
+        }
+    }
+}
+
+/**
+ * Parse a value that is going to be written to some kind of variable or method
+ * call. We need to handle this separately because the rescue modifier is
+ * permitted on the end of the these expressions, which is a deviation from its
+ * normal binding power.
+ *
+ * Note that this will only be called after an operator write, as in &&=, ||=,
+ * or any of the binary operators that can be written to a variable.
+ */
+static pm_node_t *
+parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
+
+    // Contradicting binding powers, the right-hand-side value of the assignment
+    // allows the `rescue` modifier.
+    if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+        context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+        pm_token_t rescue = parser->current;
+        parser_lex(parser);
+
+        pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+        context_pop(parser);
+
+        return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
+    }
+
+    return value;
+}
+
+/**
+ * When a local variable write node is the value being written in a different
+ * write, the local variable is considered "used".
+ */
+static void
+parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_BEGIN_NODE: {
+            const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
+            if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
+            break;
+        }
+        case PM_LOCAL_VARIABLE_WRITE_NODE: {
+            const pm_local_variable_write_node_t *cast = (const pm_local_variable_write_node_t *) node;
+            pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
+            break;
+        }
+        case PM_PARENTHESES_NODE: {
+            const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
+            if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
+            break;
+        }
+        case PM_STATEMENTS_NODE: {
+            const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
+            const pm_node_t *statement;
+
+            PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
+                parse_assignment_value_local(parser, statement);
+            }
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+/**
+ * Parse the value (or values, through an implicit array) that is going to be
+ * written to some kind of variable or method call. We need to handle this
+ * separately because the rescue modifier is permitted on the end of the these
+ * expressions, which is a deviation from its normal binding power.
+ *
+ * Additionally, if the value is a local variable write node (e.g., a = a = 1),
+ * the "a" is marked as being used so the parser should not warn on it.
+ *
+ * Note that this will only be called after an = operator, as that is the only
+ * operator that allows multiple values after it.
+ */
+static pm_node_t *
+parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    bool permitted = true;
+    if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
+
+    pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
+    if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
+
+    parse_assignment_value_local(parser, value);
+    bool single_value = true;
+
+    if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
+        single_value = false;
+
+        pm_token_t opening = not_provided(parser);
+        pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+        pm_array_node_elements_append(array, value);
+        value = UP(array);
+
+        while (accept1(parser, PM_TOKEN_COMMA)) {
+            pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
+
+            pm_array_node_elements_append(array, element);
+            if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+
+            parse_assignment_value_local(parser, element);
+        }
+    }
+
+    // Contradicting binding powers, the right-hand-side value of the assignment
+    // allows the `rescue` modifier.
+    if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+        context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+        pm_token_t rescue = parser->current;
+        parser_lex(parser);
+
+        bool accepts_command_call_inner = false;
+
+        // RHS can accept command call iff the value is a call with arguments
+        // but without parenthesis.
+        if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
+            pm_call_node_t *call_node = (pm_call_node_t *) value;
+            if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
+                accepts_command_call_inner = true;
+            }
+        }
+
+        pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+        context_pop(parser);
+
+        return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
+    }
+
+    return value;
+}
+
+/**
+ * Ensure a call node that is about to become a call operator node does not
+ * have arguments or a block attached. If it does, then we'll need to add an
+ * error message and destroy the arguments/block. Ideally we would keep the node
+ * around so that consumers would still have access to it, but we don't have a
+ * great structure for that at the moment.
+ */
+static void
+parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
+    if (call_node->arguments != NULL) {
+        pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
+        pm_node_unreference(parser, UP(call_node->arguments));
+        pm_node_destroy(parser, UP(call_node->arguments));
+        call_node->arguments = NULL;
+    }
+
+    if (call_node->block != NULL) {
+        pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
+        pm_node_unreference(parser, UP(call_node->block));
+        pm_node_destroy(parser, UP(call_node->block));
+        call_node->block = NULL;
+    }
+}
+
+/**
+ * This struct is used to pass information between the regular expression parser
+ * and the named capture callback.
+ */
+typedef struct {
+    /** The parser that is parsing the regular expression. */
+    pm_parser_t *parser;
+
+    /** The call node wrapping the regular expression node. */
+    pm_call_node_t *call;
+
+    /** The match write node that is being created. */
+    pm_match_write_node_t *match;
+
+    /** The list of names that have been parsed. */
+    pm_constant_id_list_t names;
+
+    /**
+     * Whether the content of the regular expression is shared. This impacts
+     * whether or not we used owned constants or shared constants in the
+     * constant pool for the names of the captures.
+     */
+    bool shared;
+} parse_regular_expression_named_capture_data_t;
+
+static inline const uint8_t *
+pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+    cursor++;
+
+    if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
+        uint8_t value = escape_hexadecimal_digit(*cursor);
+        cursor++;
+
+        if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
+            value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
+            cursor++;
+        }
+
+        pm_buffer_append_byte(unescaped, value);
+    } else {
+        pm_buffer_append_string(unescaped, "\\x", 2);
+    }
+
+    return cursor;
+}
+
+static inline const uint8_t *
+pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+    uint8_t value = (uint8_t) (*cursor - '0');
+    cursor++;
+
+    if (cursor < end && pm_char_is_octal_digit(*cursor)) {
+        value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
+        cursor++;
+
+        if (cursor < end && pm_char_is_octal_digit(*cursor)) {
+            value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
+            cursor++;
+        }
+    }
+
+    pm_buffer_append_byte(unescaped, value);
+    return cursor;
+}
+
+static inline const uint8_t *
+pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
+    const uint8_t *start = cursor - 1;
+    cursor++;
+
+    if (cursor >= end) {
+        pm_buffer_append_string(unescaped, "\\u", 2);
+        return cursor;
+    }
+
+    if (*cursor != '{') {
+        size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
+        uint32_t value = escape_unicode(parser, cursor, length, error_location);
+
+        if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
+            pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
+        }
+
+        return cursor + length;
+    }
+
+    cursor++;
+    for (;;) {
+        while (cursor < end && *cursor == ' ') cursor++;
+
+        if (cursor >= end) break;
+        if (*cursor == '}') {
+            cursor++;
+            break;
+        }
+
+        size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
+        if (length == 0) {
+            break;
+        }
+        uint32_t value = escape_unicode(parser, cursor, length, error_location);
+
+        (void) pm_buffer_append_unicode_codepoint(unescaped, value);
+        cursor += length;
+    }
+
+    return cursor;
+}
+
+static void
+pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
+    const uint8_t *end = source + length;
+    pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
+
+    for (;;) {
+        if (++cursor >= end) {
+            pm_buffer_append_byte(unescaped, '\\');
+            return;
+        }
+
+        switch (*cursor) {
+            case 'x':
+                cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
+                break;
+            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+                cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
+                break;
+            case 'u':
+                cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
+                break;
+            default:
+                pm_buffer_append_byte(unescaped, '\\');
+                break;
+        }
+
+        const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
+        if (next_cursor == NULL) break;
+
+        pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
+        cursor = next_cursor;
+    }
+
+    pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
+}
+
+/**
+ * This callback is called when the regular expression parser encounters a named
+ * capture group.
+ */
+static void
+parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
+    parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
+
+    pm_parser_t *parser = callback_data->parser;
+    pm_call_node_t *call = callback_data->call;
+    pm_constant_id_list_t *names = &callback_data->names;
+
+    const uint8_t *source = pm_string_source(capture);
+    size_t length = pm_string_length(capture);
+    pm_buffer_t unescaped = { 0 };
+
+    // First, we need to handle escapes within the name of the capture group.
+    // This is because regular expressions have three different representations
+    // in prism. The first is the plain source code. The second is the
+    // representation that will be sent to the regular expression engine, which
+    // is the value of the "unescaped" field. This is poorly named, because it
+    // actually still contains escapes, just a subset of them that the regular
+    // expression engine knows how to handle. The third representation is fully
+    // unescaped, which is what we need.
+    const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
+    if (PRISM_UNLIKELY(cursor != NULL)) {
+        pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->shared ? NULL : &call->receiver->location);
+        source = (const uint8_t *) pm_buffer_value(&unescaped);
+        length = pm_buffer_length(&unescaped);
+    }
+
+    pm_location_t location;
+    pm_constant_id_t name;
+
+    // If the name of the capture group isn't a valid identifier, we do
+    // not add it to the local table.
+    if (!pm_slice_is_valid_local(parser, source, source + length)) {
+        pm_buffer_free(&unescaped);
+        return;
+    }
+
+    if (callback_data->shared) {
+        // If the unescaped string is a slice of the source, then we can
+        // copy the names directly. The pointers will line up.
+        location = (pm_location_t) { .start = source, .end = source + length };
+        name = pm_parser_constant_id_location(parser, location.start, location.end);
+    } else {
+        // Otherwise, the name is a slice of the malloc-ed owned string,
+        // in which case we need to copy it out into a new string.
+        location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
+
+        void *memory = xmalloc(length);
+        if (memory == NULL) abort();
+
+        memcpy(memory, source, length);
+        name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+    }
+
+    // Add this name to the list of constants if it is valid, not duplicated,
+    // and not a keyword.
+    if (name != 0 && !pm_constant_id_list_includes(names, name)) {
+        pm_constant_id_list_append(names, name);
+
+        int depth;
+        if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
+            // If the local is not already a local but it is a keyword, then we
+            // do not want to add a capture for this.
+            if (pm_local_is_keyword((const char *) source, length)) {
+                pm_buffer_free(&unescaped);
+                return;
+            }
+
+            // If the identifier is not already a local, then we will add it to
+            // the local table.
+            pm_parser_local_add(parser, name, location.start, location.end, 0);
+        }
+
+        // Here we lazily create the MatchWriteNode since we know we're
+        // about to add a target.
+        if (callback_data->match == NULL) {
+            callback_data->match = pm_match_write_node_create(parser, call);
+        }
+
+        // Next, create the local variable target and add it to the list of
+        // targets for the match.
+        pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth));
+        pm_node_list_append(&callback_data->match->targets, target);
+    }
+
+    pm_buffer_free(&unescaped);
+}
+
+/**
+ * Potentially change a =~ with a regular expression with named captures into a
+ * match write node.
+ */
+static pm_node_t *
+parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
+    parse_regular_expression_named_capture_data_t callback_data = {
+        .parser = parser,
+        .call = call,
+        .names = { 0 },
+        .shared = content->type == PM_STRING_SHARED
+    };
+
+    parse_regular_expression_error_data_t error_data = {
+        .parser = parser,
+        .start = call->receiver->location.start,
+        .end = call->receiver->location.end,
+        .shared = content->type == PM_STRING_SHARED
+    };
+
+    pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
+    pm_constant_id_list_free(&callback_data.names);
+
+    if (callback_data.match != NULL) {
+        return UP(callback_data.match);
+    } else {
+        return UP(call);
+    }
+}
+
+static inline pm_node_t *
+parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
+    pm_token_t token = parser->current;
+
+    switch (token.type) {
+        case PM_TOKEN_EQUAL: {
+            switch (PM_NODE_TYPE(node)) {
+                case PM_CALL_NODE: {
+                    // If we have no arguments to the call node and we need this
+                    // to be a target then this is either a method call or a
+                    // local variable write. This _must_ happen before the value
+                    // is parsed because it could be referenced in the value.
+                    pm_call_node_t *call_node = (pm_call_node_t *) node;
+                    if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
+                        pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
+                    }
+                }
+                PRISM_FALLTHROUGH
+                case PM_CASE_WRITABLE: {
+                    // When we have `it = value`, we need to add `it` as a local
+                    // variable before parsing the value, in case the value
+                    // references the variable.
+                    if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+                        pm_parser_local_add_location(parser, node->location.start, node->location.end, 0);
+                    }
+
+                    parser_lex(parser);
+                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+
+                    if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
+                        pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
+                    }
+
+                    return parse_write(parser, node, &token, value);
+                }
+                case PM_SPLAT_NODE: {
+                    pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+                    pm_multi_target_node_targets_append(parser, multi_target, node);
+
+                    parser_lex(parser);
+                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+                    return parse_write(parser, UP(multi_target), &token, value);
+                }
+                case PM_SOURCE_ENCODING_NODE:
+                case PM_FALSE_NODE:
+                case PM_SOURCE_FILE_NODE:
+                case PM_SOURCE_LINE_NODE:
+                case PM_NIL_NODE:
+                case PM_SELF_NODE:
+                case PM_TRUE_NODE: {
+                    // In these special cases, we have specific error messages
+                    // and we will replace them with local variable writes.
+                    parser_lex(parser);
+                    pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+                    return parse_unwriteable_write(parser, node, &token, value);
+                }
+                default:
+                    // In this case we have an = sign, but we don't know what
+                    // it's for. We need to treat it as an error. We'll mark it
+                    // as an error and skip past it.
+                    parser_lex(parser);
+                    pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
+                    return node;
+            }
+        }
+        case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
+            switch (PM_NODE_TYPE(node)) {
+                case PM_BACK_REFERENCE_READ_NODE:
+                case PM_NUMBERED_REFERENCE_READ_NODE:
+                    PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
+                PRISM_FALLTHROUGH
+                case PM_GLOBAL_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CLASS_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CONSTANT_PATH_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
+
+                    return parse_shareable_constant_write(parser, write);
+                }
+                case PM_CONSTANT_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return parse_shareable_constant_write(parser, write);
+                }
+                case PM_INSTANCE_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+                    pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
+
+                    pm_node_unreference(parser, node);
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_LOCAL_VARIABLE_READ_NODE: {
+                    if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
+                        pm_node_unreference(parser, node);
+                    }
+
+                    pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CALL_NODE: {
+                    pm_call_node_t *cast = (pm_call_node_t *) node;
+
+                    // If we have a vcall (a method with no arguments and no
+                    // receiver that could have been a local variable) then we
+                    // will transform it into a local variable write.
+                    if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
+                        pm_location_t *message_loc = &cast->message_loc;
+                        pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
+
+                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+                        parser_lex(parser);
+
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                        pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
+
+                        pm_node_destroy(parser, UP(cast));
+                        return result;
+                    }
+
+                    // Move past the token here so that we have already added
+                    // the local variable by this point.
+                    parser_lex(parser);
+
+                    // If there is no call operator and the message is "[]" then
+                    // this is an aref expression, and we can transform it into
+                    // an aset expression.
+                    if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                        return UP(pm_index_and_write_node_create(parser, cast, &token, value));
+                    }
+
+                    // If this node cannot be writable, then we have an error.
+                    if (pm_call_node_writable_p(parser, cast)) {
+                        parse_write_name(parser, &cast->name);
+                    } else {
+                        pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
+                    }
+
+                    parse_call_operator_write(parser, cast, &token);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+                    return UP(pm_call_and_write_node_create(parser, cast, &token, value));
+                }
+                case PM_MULTI_WRITE_NODE: {
+                    parser_lex(parser);
+                    pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
+                    return node;
+                }
+                default:
+                    parser_lex(parser);
+
+                    // In this case we have an &&= sign, but we don't know what it's for.
+                    // We need to treat it as an error. For now, we'll mark it as an error
+                    // and just skip right past it.
+                    pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
+                    return node;
+            }
+        }
+        case PM_TOKEN_PIPE_PIPE_EQUAL: {
+            switch (PM_NODE_TYPE(node)) {
+                case PM_BACK_REFERENCE_READ_NODE:
+                case PM_NUMBERED_REFERENCE_READ_NODE:
+                    PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
+                PRISM_FALLTHROUGH
+                case PM_GLOBAL_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CLASS_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CONSTANT_PATH_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
+
+                    return parse_shareable_constant_write(parser, write);
+                }
+                case PM_CONSTANT_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return parse_shareable_constant_write(parser, write);
+                }
+                case PM_INSTANCE_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+                    pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
+
+                    pm_node_unreference(parser, node);
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_LOCAL_VARIABLE_READ_NODE: {
+                    if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
+                        pm_node_unreference(parser, node);
+                    }
+
+                    pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CALL_NODE: {
+                    pm_call_node_t *cast = (pm_call_node_t *) node;
+
+                    // If we have a vcall (a method with no arguments and no
+                    // receiver that could have been a local variable) then we
+                    // will transform it into a local variable write.
+                    if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
+                        pm_location_t *message_loc = &cast->message_loc;
+                        pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
+
+                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+                        parser_lex(parser);
+
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                        pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
+
+                        pm_node_destroy(parser, UP(cast));
+                        return result;
+                    }
+
+                    // Move past the token here so that we have already added
+                    // the local variable by this point.
+                    parser_lex(parser);
+
+                    // If there is no call operator and the message is "[]" then
+                    // this is an aref expression, and we can transform it into
+                    // an aset expression.
+                    if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                        return UP(pm_index_or_write_node_create(parser, cast, &token, value));
+                    }
+
+                    // If this node cannot be writable, then we have an error.
+                    if (pm_call_node_writable_p(parser, cast)) {
+                        parse_write_name(parser, &cast->name);
+                    } else {
+                        pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
+                    }
+
+                    parse_call_operator_write(parser, cast, &token);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+                    return UP(pm_call_or_write_node_create(parser, cast, &token, value));
+                }
+                case PM_MULTI_WRITE_NODE: {
+                    parser_lex(parser);
+                    pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
+                    return node;
+                }
+                default:
+                    parser_lex(parser);
+
+                    // In this case we have an ||= sign, but we don't know what it's for.
+                    // We need to treat it as an error. For now, we'll mark it as an error
+                    // and just skip right past it.
+                    pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
+                    return node;
+            }
+        }
+        case PM_TOKEN_AMPERSAND_EQUAL:
+        case PM_TOKEN_CARET_EQUAL:
+        case PM_TOKEN_GREATER_GREATER_EQUAL:
+        case PM_TOKEN_LESS_LESS_EQUAL:
+        case PM_TOKEN_MINUS_EQUAL:
+        case PM_TOKEN_PERCENT_EQUAL:
+        case PM_TOKEN_PIPE_EQUAL:
+        case PM_TOKEN_PLUS_EQUAL:
+        case PM_TOKEN_SLASH_EQUAL:
+        case PM_TOKEN_STAR_EQUAL:
+        case PM_TOKEN_STAR_STAR_EQUAL: {
+            switch (PM_NODE_TYPE(node)) {
+                case PM_BACK_REFERENCE_READ_NODE:
+                case PM_NUMBERED_REFERENCE_READ_NODE:
+                    PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
+                PRISM_FALLTHROUGH
+                case PM_GLOBAL_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CLASS_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CONSTANT_PATH_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
+
+                    return parse_shareable_constant_write(parser, write);
+                }
+                case PM_CONSTANT_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return parse_shareable_constant_write(parser, write);
+                }
+                case PM_INSTANCE_VARIABLE_READ_NODE: {
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+                    pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
+
+                    pm_node_unreference(parser, node);
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_LOCAL_VARIABLE_READ_NODE: {
+                    if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
+                        PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
+                        pm_node_unreference(parser, node);
+                    }
+
+                    pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
+                    parser_lex(parser);
+
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
+
+                    pm_node_destroy(parser, node);
+                    return result;
+                }
+                case PM_CALL_NODE: {
+                    parser_lex(parser);
+                    pm_call_node_t *cast = (pm_call_node_t *) node;
+
+                    // If we have a vcall (a method with no arguments and no
+                    // receiver that could have been a local variable) then we
+                    // will transform it into a local variable write.
+                    if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
+                        pm_location_t *message_loc = &cast->message_loc;
+                        pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
+
+                        pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                        pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
+
+                        pm_node_destroy(parser, UP(cast));
+                        return result;
+                    }
+
+                    // If there is no call operator and the message is "[]" then
+                    // this is an aref expression, and we can transform it into
+                    // an aset expression.
+                    if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
+                        pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                        return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
+                    }
+
+                    // If this node cannot be writable, then we have an error.
+                    if (pm_call_node_writable_p(parser, cast)) {
+                        parse_write_name(parser, &cast->name);
+                    } else {
+                        pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
+                    }
+
+                    parse_call_operator_write(parser, cast, &token);
+                    pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+                    return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
+                }
+                case PM_MULTI_WRITE_NODE: {
+                    parser_lex(parser);
+                    pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
+                    return node;
+                }
+                default:
+                    parser_lex(parser);
+
+                    // In this case we have an operator but we don't know what it's for.
+                    // We need to treat it as an error. For now, we'll mark it as an error
+                    // and just skip right past it.
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
+                    return node;
+            }
+        }
+        case PM_TOKEN_AMPERSAND_AMPERSAND:
+        case PM_TOKEN_KEYWORD_AND: {
+            parser_lex(parser);
+
+            pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_and_node_create(parser, node, &token, right));
+        }
+        case PM_TOKEN_KEYWORD_OR:
+        case PM_TOKEN_PIPE_PIPE: {
+            parser_lex(parser);
+
+            pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_or_node_create(parser, node, &token, right));
+        }
+        case PM_TOKEN_EQUAL_TILDE: {
+            // Note that we _must_ parse the value before adding the local
+            // variables in order to properly mirror the behavior of Ruby. For
+            // example,
+            //
+            //     /(?<foo>bar)/ =~ foo
+            //
+            // In this case, `foo` should be a method call and not a local yet.
+            parser_lex(parser);
+            pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+
+            // By default, we're going to create a call node and then return it.
+            pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
+            pm_node_t *result = UP(call);
+
+            // If the receiver of this =~ is a regular expression node, then we
+            // need to introduce local variables for it based on its named
+            // capture groups.
+            if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
+                // It's possible to have an interpolated regular expression node
+                // that only contains strings. This is because it can be split
+                // up by a heredoc. In this case we need to concat the unescaped
+                // strings together and then parse them as a regular expression.
+                pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
+
+                bool interpolated = false;
+                size_t total_length = 0;
+
+                pm_node_t *part;
+                PM_NODE_LIST_FOREACH(parts, index, part) {
+                    if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
+                        total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
+                    } else {
+                        interpolated = true;
+                        break;
+                    }
+                }
+
+                if (!interpolated && total_length > 0) {
+                    void *memory = xmalloc(total_length);
+                    if (!memory) abort();
+
+                    uint8_t *cursor = memory;
+                    PM_NODE_LIST_FOREACH(parts, index, part) {
+                        pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
+                        size_t length = pm_string_length(unescaped);
+
+                        memcpy(cursor, pm_string_source(unescaped), length);
+                        cursor += length;
+                    }
+
+                    pm_string_t owned;
+                    pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
+
+                    result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+                    pm_string_free(&owned);
+                }
+            } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
+                // If we have a regular expression node, then we can just parse
+                // the named captures directly off the unescaped string.
+                const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
+                result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+            }
+
+            return result;
+        }
+        case PM_TOKEN_UAMPERSAND:
+        case PM_TOKEN_USTAR:
+        case PM_TOKEN_USTAR_STAR:
+            // The only times this will occur are when we are in an error state,
+            // but we'll put them in here so that errors can propagate.
+        case PM_TOKEN_BANG_EQUAL:
+        case PM_TOKEN_BANG_TILDE:
+        case PM_TOKEN_EQUAL_EQUAL:
+        case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+        case PM_TOKEN_LESS_EQUAL_GREATER:
+        case PM_TOKEN_CARET:
+        case PM_TOKEN_PIPE:
+        case PM_TOKEN_AMPERSAND:
+        case PM_TOKEN_GREATER_GREATER:
+        case PM_TOKEN_LESS_LESS:
+        case PM_TOKEN_MINUS:
+        case PM_TOKEN_PLUS:
+        case PM_TOKEN_PERCENT:
+        case PM_TOKEN_SLASH:
+        case PM_TOKEN_STAR:
+        case PM_TOKEN_STAR_STAR: {
+            parser_lex(parser);
+            pm_token_t operator = parser->previous;
+            switch (PM_NODE_TYPE(node)) {
+                case PM_RESCUE_MODIFIER_NODE: {
+                    pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_AND_NODE: {
+                    pm_and_node_t *cast = (pm_and_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_OR_NODE: {
+                    pm_or_node_t *cast = (pm_or_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                default:
+                    break;
+            }
+
+            pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
+        }
+        case PM_TOKEN_GREATER:
+        case PM_TOKEN_GREATER_EQUAL:
+        case PM_TOKEN_LESS:
+        case PM_TOKEN_LESS_EQUAL: {
+            if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
+                PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
+            }
+
+            parser_lex(parser);
+            pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
+        }
+        case PM_TOKEN_AMPERSAND_DOT:
+        case PM_TOKEN_DOT: {
+            parser_lex(parser);
+            pm_token_t operator = parser->previous;
+            pm_arguments_t arguments = { 0 };
+
+            // This if statement handles the foo.() syntax.
+            if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+                parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
+                return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
+            }
+
+            switch (PM_NODE_TYPE(node)) {
+                case PM_RESCUE_MODIFIER_NODE: {
+                    pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_AND_NODE: {
+                    pm_and_node_t *cast = (pm_and_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                case PM_OR_NODE: {
+                    pm_or_node_t *cast = (pm_or_node_t *) node;
+                    if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
+                        PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+                    }
+                    break;
+                }
+                default:
+                    break;
+            }
+
+            pm_token_t message;
+
+            switch (parser->current.type) {
+                case PM_CASE_OPERATOR:
+                case PM_CASE_KEYWORD:
+                case PM_TOKEN_CONSTANT:
+                case PM_TOKEN_IDENTIFIER:
+                case PM_TOKEN_METHOD_NAME: {
+                    parser_lex(parser);
+                    message = parser->previous;
+                    break;
+                }
+                default: {
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
+                    message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                }
+            }
+
+            parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+            pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
+
+            if (
+                (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
+                arguments.arguments == NULL &&
+                arguments.opening_loc.start == NULL &&
+                match1(parser, PM_TOKEN_COMMA)
+            ) {
+                return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            } else {
+                return UP(call);
+            }
+        }
+        case PM_TOKEN_DOT_DOT:
+        case PM_TOKEN_DOT_DOT_DOT: {
+            parser_lex(parser);
+
+            pm_node_t *right = NULL;
+            if (token_begins_expression_p(parser->current.type)) {
+                right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+            }
+
+            return UP(pm_range_node_create(parser, node, &token, right));
+        }
+        case PM_TOKEN_KEYWORD_IF_MODIFIER: {
+            pm_token_t keyword = parser->current;
+            parser_lex(parser);
+
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
+        }
+        case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
+            pm_token_t keyword = parser->current;
+            parser_lex(parser);
+
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
+        }
+        case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
+            parser_lex(parser);
+            pm_statements_node_t *statements = pm_statements_node_create(parser);
+            pm_statements_node_body_append(parser, statements, node, true);
+
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
+        }
+        case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
+            parser_lex(parser);
+            pm_statements_node_t *statements = pm_statements_node_create(parser);
+            pm_statements_node_body_append(parser, statements, node, true);
+
+            pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+            return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
+        }
+        case PM_TOKEN_QUESTION_MARK: {
+            context_push(parser, PM_CONTEXT_TERNARY);
+            pm_node_list_t current_block_exits = { 0 };
+            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+            pm_token_t qmark = parser->current;
+            parser_lex(parser);
+
+            pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
+
+            if (parser->recovering) {
+                // If parsing the true expression of this ternary resulted in a syntax
+                // error that we can recover from, then we're going to put missing nodes
+                // and tokens into the remaining places. We want to be sure to do this
+                // before the `expect` function call to make sure it doesn't
+                // accidentally move past a ':' token that occurs after the syntax
+                // error.
+                pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+                pm_node_t *false_expression = UP(pm_missing_node_create(parser, colon.start, colon.end));
+
+                context_pop(parser);
+                pop_block_exits(parser, previous_block_exits);
+                pm_node_list_free(&current_block_exits);
+
+                return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
+            }
+
+            accept1(parser, PM_TOKEN_NEWLINE);
+            expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
+
+            pm_token_t colon = parser->previous;
+            pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
+
+            context_pop(parser);
+            pop_block_exits(parser, previous_block_exits);
+            pm_node_list_free(&current_block_exits);
+
+            return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
+        }
+        case PM_TOKEN_COLON_COLON: {
+            parser_lex(parser);
+            pm_token_t delimiter = parser->previous;
+
+            switch (parser->current.type) {
+                case PM_TOKEN_CONSTANT: {
+                    parser_lex(parser);
+                    pm_node_t *path;
+
+                    if (
+                        (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
+                        (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
+                    ) {
+                        // If we have a constant immediately following a '::' operator, then
+                        // this can either be a constant path or a method call, depending on
+                        // what follows the constant.
+                        //
+                        // If we have parentheses, then this is a method call. That would
+                        // look like Foo::Bar().
+                        pm_token_t message = parser->previous;
+                        pm_arguments_t arguments = { 0 };
+
+                        parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+                        path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
+                    } else {
+                        // Otherwise, this is a constant path. That would look like Foo::Bar.
+                        path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
+                    }
+
+                    // If this is followed by a comma then it is a multiple assignment.
+                    if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                        return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                    }
+
+                    return path;
+                }
+                case PM_CASE_OPERATOR:
+                case PM_CASE_KEYWORD:
+                case PM_TOKEN_IDENTIFIER:
+                case PM_TOKEN_METHOD_NAME: {
+                    parser_lex(parser);
+                    pm_token_t message = parser->previous;
+
+                    // If we have an identifier following a '::' operator, then it is for
+                    // sure a method call.
+                    pm_arguments_t arguments = { 0 };
+                    parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+                    pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
+
+                    // If this is followed by a comma then it is a multiple assignment.
+                    if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                        return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                    }
+
+                    return UP(call);
+                }
+                case PM_TOKEN_PARENTHESIS_LEFT: {
+                    // If we have a parenthesis following a '::' operator, then it is the
+                    // method call shorthand. That would look like Foo::(bar).
+                    pm_arguments_t arguments = { 0 };
+                    parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
+
+                    return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
+                }
+                default: {
+                    expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+                    return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
+                }
+            }
+        }
+        case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
+            context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+            parser_lex(parser);
+            accept1(parser, PM_TOKEN_NEWLINE);
+
+            pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+            context_pop(parser);
+
+            return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
+        }
+        case PM_TOKEN_BRACKET_LEFT: {
+            parser_lex(parser);
+
+            pm_arguments_t arguments = { 0 };
+            arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+
+            if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
+                pm_accepts_block_stack_push(parser, true);
+                parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
+                pm_accepts_block_stack_pop(parser);
+                expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
+            }
+
+            arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+
+            // If we have a comma after the closing bracket then this is a multiple
+            // assignment and we should parse the targets.
+            if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
+                pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
+                return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+            }
+
+            // If we're at the end of the arguments, we can now check if there is a
+            // block node that starts with a {. If there is, then we can parse it and
+            // add it to the arguments.
+            pm_block_node_t *block = NULL;
+            if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
+                block = parse_block(parser, (uint16_t) (depth + 1));
+                pm_arguments_validate_block(parser, &arguments, block);
+            } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
+                block = parse_block(parser, (uint16_t) (depth + 1));
+            }
+
+            if (block != NULL) {
+                if (arguments.block != NULL) {
+                    pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
+                    if (arguments.arguments == NULL) {
+                        arguments.arguments = pm_arguments_node_create(parser);
+                    }
+                    pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
+                }
+
+                arguments.block = UP(block);
+            }
+
+            return UP(pm_call_node_aref_create(parser, node, &arguments));
+        }
+        case PM_TOKEN_KEYWORD_IN: {
+            bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+            parser->pattern_matching_newlines = true;
+
+            pm_token_t operator = parser->current;
+            parser->command_start = false;
+            lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+            parser_lex(parser);
+
+            pm_constant_id_list_t captures = { 0 };
+            pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
+            parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+            pm_constant_id_list_free(&captures);
+
+            return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
+        }
+        case PM_TOKEN_EQUAL_GREATER: {
+            bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+            parser->pattern_matching_newlines = true;
+
+            pm_token_t operator = parser->current;
+            parser->command_start = false;
+            lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+            parser_lex(parser);
+
+            pm_constant_id_list_t captures = { 0 };
+            pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
+
+            parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+            pm_constant_id_list_free(&captures);
+
+            return UP(pm_match_required_node_create(parser, node, pattern, &operator));
+        }
+        default:
+            assert(false && "unreachable");
+            return NULL;
+    }
+}
+
+#undef PM_PARSE_PATTERN_SINGLE
+#undef PM_PARSE_PATTERN_TOP
+#undef PM_PARSE_PATTERN_MULTI
+
+/**
+ * Determine if a given call node looks like a "command", which means it has
+ * arguments but does not have parentheses.
+ */
+static inline bool
+pm_call_node_command_p(const pm_call_node_t *node) {
+    return (
+        (node->opening_loc.start == NULL) &&
+        (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
+        (node->arguments != NULL || node->block != NULL)
+    );
+}
+
+/**
+ * Parse an expression at the given point of the parser using the given binding
+ * power to parse subsequent chains. If this function finds a syntax error, it
+ * will append the error message to the parser's error list.
+ *
+ * Consumers of this function should always check parser->recovering to
+ * determine if they need to perform additional cleanup.
+ */
+static pm_node_t *
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+    if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
+        pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
+        return UP(pm_missing_node_create(parser, parser->current.start, parser->current.end));
+    }
+
+    pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+
+    switch (PM_NODE_TYPE(node)) {
+        case PM_MISSING_NODE:
+            // If we found a syntax error, then the type of node returned by
+            // parse_expression_prefix is going to be a missing node.
+            return node;
+        case PM_PRE_EXECUTION_NODE:
+        case PM_POST_EXECUTION_NODE:
+        case PM_ALIAS_GLOBAL_VARIABLE_NODE:
+        case PM_ALIAS_METHOD_NODE:
+        case PM_MULTI_WRITE_NODE:
+        case PM_UNDEF_NODE:
+            // These expressions are statements, and cannot be followed by
+            // operators (except modifiers).
+            if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+                return node;
+            }
+            break;
+        case PM_CALL_NODE:
+            // If we have a call node, then we need to check if it looks like a
+            // method call without parentheses that contains arguments. If it
+            // does, then it has different rules for parsing infix operators,
+            // namely that it only accepts composition (and/or) and modifiers
+            // (if/unless/etc.).
+            if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
+                return node;
+            }
+            break;
+        case PM_SYMBOL_NODE:
+            // If we have a symbol node that is being parsed as a label, then we
+            // need to immediately return, because there should never be an
+            // infix operator following this node.
+            if (pm_symbol_node_label_p(node)) {
+                return node;
+            }
+            break;
+        default:
+            break;
+    }
+
+    // Otherwise we'll look and see if the next token can be parsed as an infix
+    // operator. If it can, then we'll parse it using parse_expression_infix.
+    pm_binding_powers_t current_binding_powers;
+    pm_token_type_t current_token_type;
+
+    while (
+        current_token_type = parser->current.type,
+        current_binding_powers = pm_binding_powers[current_token_type],
+        binding_power <= current_binding_powers.left &&
+        current_binding_powers.binary
+     ) {
+        node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
+
+        if (context_terminator(parser->current_context->context, &parser->current)) {
+            // If this token terminates the current context, then we need to
+            // stop parsing the expression, as it has become a statement.
+            return node;
+        }
+
+        switch (PM_NODE_TYPE(node)) {
+            case PM_MULTI_WRITE_NODE:
+                // Multi-write nodes are statements, and cannot be followed by
+                // operators except modifiers.
+                if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+                    return node;
+                }
+                break;
+            case PM_CLASS_VARIABLE_WRITE_NODE:
+            case PM_CONSTANT_PATH_WRITE_NODE:
+            case PM_CONSTANT_WRITE_NODE:
+            case PM_GLOBAL_VARIABLE_WRITE_NODE:
+            case PM_INSTANCE_VARIABLE_WRITE_NODE:
+            case PM_LOCAL_VARIABLE_WRITE_NODE:
+                // These expressions are statements, by virtue of the right-hand
+                // side of their write being an implicit array.
+                if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+                    return node;
+                }
+                break;
+            case PM_CALL_NODE:
+                // These expressions are also statements, by virtue of the
+                // right-hand side of the expression (i.e., the last argument to
+                // the call node) being an implicit array.
+                if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
+                    return node;
+                }
+                break;
+            default:
+                break;
+        }
+
+        // If the operator is nonassoc and we should not be able to parse the
+        // upcoming infix operator, break.
+        if (current_binding_powers.nonassoc) {
+            // If this is a non-assoc operator and we are about to parse the
+            // exact same operator, then we need to add an error.
+            if (match1(parser, current_token_type)) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+                break;
+            }
+
+            // If this is an endless range, then we need to reject a couple of
+            // additional operators because it violates the normal operator
+            // precedence rules. Those patterns are:
+            //
+            //     1.. & 2
+            //     1.. * 2
+            //
+            if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
+                if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+                    break;
+                }
+
+                if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
+                    break;
+                }
+            } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
+                break;
+            }
+        }
+
+        if (accepts_command_call) {
+            // A command-style method call is only accepted on method chains.
+            // Thus, we check whether the parsed node can continue method chains.
+            // The method chain can continue if the parsed node is one of the following five kinds:
+            // (1) index access: foo[1]
+            // (2) attribute access: foo.bar
+            // (3) method call with parenthesis: foo.bar(1)
+            // (4) method call with a block: foo.bar do end
+            // (5) constant path: foo::Bar
+            switch (node->type) {
+                case PM_CALL_NODE: {
+                    pm_call_node_t *cast = (pm_call_node_t *)node;
+                    if (
+                        // (1) foo[1]
+                        !(
+                            cast->call_operator_loc.start == NULL &&
+                            cast->message_loc.start != NULL &&
+                            cast->message_loc.start[0] == '[' &&
+                            cast->message_loc.end[-1] == ']'
+                        ) &&
+                        // (2) foo.bar
+                        !(
+                            cast->call_operator_loc.start != NULL &&
+                            cast->arguments == NULL &&
+                            cast->block == NULL &&
+                            cast->opening_loc.start == NULL
+                        ) &&
+                        // (3) foo.bar(1)
+                        !(
+                            cast->call_operator_loc.start != NULL &&
+                            cast->opening_loc.start != NULL
+                        ) &&
+                        // (4) foo.bar do end
+                        !(
+                            cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
+                        )
+                     ) {
+                        accepts_command_call = false;
+                    }
+                    break;
+                }
+                // (5) foo::Bar
+                case PM_CONSTANT_PATH_NODE:
+                    break;
+                default:
+                    accepts_command_call = false;
+                    break;
+            }
+        }
+    }
+
+    return node;
+}
+
+/**
+ * ruby -p, ruby -n, ruby -a, and ruby -l options will mutate the AST. We
+ * perform that mutation here.
+ */
+static pm_statements_node_t *
+wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
+    if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
+        if (statements == NULL) {
+            statements = pm_statements_node_create(parser);
+        }
+
+        pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+        pm_arguments_node_arguments_append(
+            arguments,
+            UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
+        );
+
+        pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
+            parser,
+            arguments,
+            pm_parser_constant_id_constant(parser, "print", 5)
+        )), true);
+    }
+
+    if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
+        if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
+            if (statements == NULL) {
+                statements = pm_statements_node_create(parser);
+            }
+
+            pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+            pm_arguments_node_arguments_append(
+                arguments,
+                UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
+            );
+
+            pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
+            pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
+
+            pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
+                parser,
+                pm_parser_constant_id_constant(parser, "$F", 2),
+                UP(call)
+            );
+
+            pm_statements_node_body_prepend(statements, UP(write));
+        }
+
+        pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
+        pm_arguments_node_arguments_append(
+            arguments,
+            UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
+        );
+
+        if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
+            pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
+            pm_keyword_hash_node_elements_append(keywords, UP(pm_assoc_node_create(
+                parser,
+                UP(pm_symbol_node_synthesized_create(parser, "chomp")),
+                &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
+                UP(pm_true_node_synthesized_create(parser))
+            )));
+
+            pm_arguments_node_arguments_append(arguments, UP(keywords));
+            pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
+        }
+
+        pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
+        pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
+            parser,
+            UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
+            statements
+        )), true);
+
+        statements = wrapped_statements;
+    }
+
+    return statements;
+}
+
+/**
+ * Parse the top-level program node.
+ */
+static pm_node_t *
+parse_program(pm_parser_t *parser) {
+    // If the current scope is NULL, then we want to push a new top level scope.
+    // The current scope could exist in the event that we are parsing an eval
+    // and the user has passed into scopes that already exist.
+    if (parser->current_scope == NULL) {
+        pm_parser_scope_push(parser, true);
+    }
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    parser_lex(parser);
+    pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
+
+    if (statements != NULL && !parser->parsing_eval) {
+        // If we have statements, then the top-level statement should be
+        // explicitly checked as well. We have to do this here because
+        // everywhere else we check all but the last statement.
+        assert(statements->body.size > 0);
+        pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
+    pm_parser_scope_pop(parser);
+
+    // At the top level, see if we need to wrap the statements in a program
+    // node with a while loop based on the options.
+    if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
+        statements = wrap_statements(parser, statements);
+    } else {
+        flush_block_exits(parser, previous_block_exits);
+    }
+
+    pm_node_list_free(&current_block_exits);
+
+    // If this is an empty file, then we're still going to parse all of the
+    // statements in order to gather up all of the comments and such. Here we'll
+    // correct the location information.
+    if (statements == NULL) {
+        statements = pm_statements_node_create(parser);
+        pm_statements_node_location_set(statements, parser->start, parser->start);
+    }
+
+    return UP(pm_program_node_create(parser, &locals, statements));
+}
+
+/******************************************************************************/
+/* External functions                                                         */
+/******************************************************************************/
+
+/**
+ * A vendored version of strnstr that is used to find a substring within a
+ * string with a given length. This function is used to search for the Ruby
+ * engine name within a shebang when the -x option is passed to Ruby.
+ *
+ * The only modification that we made here is that we don't do NULL byte checks
+ * because we know the little parameter will not have a NULL byte and we allow
+ * the big parameter to have them.
+ */
+static const char *
+pm_strnstr(const char *big, const char *little, size_t big_length) {
+    size_t little_length = strlen(little);
+
+    for (const char *max = big + big_length - little_length; big <= max; big++) {
+        if (*big == *little && memcmp(big, little, little_length) == 0) return big;
+    }
+
+    return NULL;
+}
+
+#ifdef _WIN32
+#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
+#else
+/**
+ * Potentially warn the user if the shebang that has been found to include
+ * "ruby" has a carriage return at the end, as that can cause problems on some
+ * platforms.
+ */
+static void
+pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
+    if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
+        pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
+    }
+}
+#endif
+
+/**
+ * Process the shebang when initializing the parser. This function assumes that
+ * the shebang_callback option has already been checked for nullability.
+ */
+static void
+pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
+    const char *switches = pm_strnstr(engine, " -", length);
+    if (switches == NULL) return;
+
+    pm_options_t next_options = *options;
+    options->shebang_callback(
+        &next_options,
+        (const uint8_t *) (switches + 1),
+        length - ((size_t) (switches - engine)) - 1,
+        options->shebang_callback_data
+    );
+
+    size_t encoding_length;
+    if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
+        const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
+        parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
+    }
+
+    parser->command_line = next_options.command_line;
+    parser->frozen_string_literal = next_options.frozen_string_literal;
+}
+
+/**
+ * Initialize a parser with the given start and end pointers.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+    assert(source != NULL);
+
+    *parser = (pm_parser_t) {
+        .node_id = 0,
+        .lex_state = PM_LEX_STATE_BEG,
+        .enclosure_nesting = 0,
+        .lambda_enclosure_nesting = -1,
+        .brace_nesting = 0,
+        .do_loop_stack = 0,
+        .accepts_block_stack = 0,
+        .lex_modes = {
+            .index = 0,
+            .stack = {{ .mode = PM_LEX_DEFAULT }},
+            .current = &parser->lex_modes.stack[0],
+        },
+        .start = source,
+        .end = source + size,
+        .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
+        .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
+        .next_start = NULL,
+        .heredoc_end = NULL,
+        .data_loc = { .start = NULL, .end = NULL },
+        .comment_list = { 0 },
+        .magic_comment_list = { 0 },
+        .warning_list = { 0 },
+        .error_list = { 0 },
+        .current_scope = NULL,
+        .current_context = NULL,
+        .encoding = PM_ENCODING_UTF_8_ENTRY,
+        .encoding_changed_callback = NULL,
+        .encoding_comment_start = source,
+        .lex_callback = NULL,
+        .filepath = { 0 },
+        .constant_pool = { 0 },
+        .newline_list = { 0 },
+        .integer_base = 0,
+        .current_string = PM_STRING_EMPTY,
+        .start_line = 1,
+        .explicit_encoding = NULL,
+        .command_line = 0,
+        .parsing_eval = false,
+        .partial_script = false,
+        .command_start = true,
+        .recovering = false,
+        .encoding_locked = false,
+        .encoding_changed = false,
+        .pattern_matching_newlines = false,
+        .in_keyword_arg = false,
+        .current_block_exits = NULL,
+        .semantic_token_seen = false,
+        .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
+        .current_regular_expression_ascii_only = false,
+        .warn_mismatched_indentation = true
+    };
+
+    // Initialize the constant pool. We're going to completely guess as to the
+    // number of constants that we'll need based on the size of the input. The
+    // ratio we chose here is actually less arbitrary than you might think.
+    //
+    // We took ~50K Ruby files and measured the size of the file versus the
+    // number of constants that were found in those files. Then we found the
+    // average and standard deviation of the ratios of constants/bytesize. Then
+    // we added 1.34 standard deviations to the average to get a ratio that
+    // would fit 75% of the files (for a two-tailed distribution). This works
+    // because there was about a 0.77 correlation and the distribution was
+    // roughly normal.
+    //
+    // This ratio will need to change if we add more constants to the constant
+    // pool for another node type.
+    uint32_t constant_size = ((uint32_t) size) / 95;
+    pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
+
+    // Initialize the newline list. Similar to the constant pool, we're going to
+    // guess at the number of newlines that we'll need based on the size of the
+    // input.
+    size_t newline_size = size / 22;
+    pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
+
+    // If options were provided to this parse, establish them here.
+    if (options != NULL) {
+        // filepath option
+        parser->filepath = options->filepath;
+
+        // line option
+        parser->start_line = options->line;
+
+        // encoding option
+        size_t encoding_length = pm_string_length(&options->encoding);
+        if (encoding_length > 0) {
+            const uint8_t *encoding_source = pm_string_source(&options->encoding);
+            parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
+        }
+
+        // encoding_locked option
+        parser->encoding_locked = options->encoding_locked;
+
+        // frozen_string_literal option
+        parser->frozen_string_literal = options->frozen_string_literal;
+
+        // command_line option
+        parser->command_line = options->command_line;
+
+        // version option
+        parser->version = options->version;
+
+        // partial_script
+        parser->partial_script = options->partial_script;
+
+        // scopes option
+        parser->parsing_eval = options->scopes_count > 0;
+        if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
+
+        for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
+            const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
+            pm_parser_scope_push(parser, scope_index == 0);
+
+            // Scopes given from the outside are not allowed to have numbered
+            // parameters.
+            parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
+
+            for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
+                const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
+
+                const uint8_t *source = pm_string_source(local);
+                size_t length = pm_string_length(local);
+
+                void *allocated = xmalloc(length);
+                if (allocated == NULL) continue;
+
+                memcpy(allocated, source, length);
+                pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
+            }
+        }
+    }
+
+    // Now that we have established the user-provided options, check if
+    // a version was given and parse as the latest version otherwise.
+    if (parser->version == PM_OPTIONS_VERSION_UNSET) {
+        parser->version = PM_OPTIONS_VERSION_LATEST;
+    }
+
+    pm_accepts_block_stack_push(parser, true);
+
+    // Skip past the UTF-8 BOM if it exists.
+    if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
+        parser->current.end += 3;
+        parser->encoding_comment_start += 3;
+
+        if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+            parser->encoding = PM_ENCODING_UTF_8_ENTRY;
+            if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
+        }
+    }
+
+    // If the -x command line flag is set, or the first shebang of the file does
+    // not include "ruby", then we'll search for a shebang that does include
+    // "ruby" and start parsing from there.
+    bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
+
+    // If the first two bytes of the source are a shebang, then we will do a bit
+    // of extra processing.
+    //
+    // First, we'll indicate that the encoding comment is at the end of the
+    // shebang. This means that when a shebang is present the encoding comment
+    // can begin on the second line.
+    //
+    // Second, we will check if the shebang includes "ruby". If it does, then we
+    // we will start parsing from there. We will also potentially warning the
+    // user if there is a carriage return at the end of the shebang. We will
+    // also potentially call the shebang callback if this is the main script to
+    // allow the caller to parse the shebang and find any command-line options.
+    // If the shebang does not include "ruby" and this is the main script being
+    // parsed, then we will start searching the file for a shebang that does
+    // contain "ruby" as if -x were passed on the command line.
+    const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+    size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
+
+    if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
+        const char *engine;
+
+        if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
+            if (newline != NULL) {
+                parser->encoding_comment_start = newline + 1;
+
+                if (options == NULL || options->main_script) {
+                    pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
+                }
+            }
+
+            if (options != NULL && options->main_script && options->shebang_callback != NULL) {
+                pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
+            }
+
+            search_shebang = false;
+        } else if (options != NULL && options->main_script && !parser->parsing_eval) {
+            search_shebang = true;
+        }
+    }
+
+    // Here we're going to find the first shebang that includes "ruby" and start
+    // parsing from there.
+    if (search_shebang) {
+        // If a shebang that includes "ruby" is not found, then we're going to a
+        // a load error to the list of errors on the parser.
+        bool found_shebang = false;
+
+        // This is going to point to the start of each line as we check it.
+        // We'll maintain a moving window looking at each line at they come.
+        const uint8_t *cursor = parser->start;
+
+        // The newline pointer points to the end of the current line that we're
+        // considering. If it is NULL, then we're at the end of the file.
+        const uint8_t *newline = next_newline(cursor, parser->end - cursor);
+
+        while (newline != NULL) {
+            pm_newline_list_append(&parser->newline_list, newline);
+
+            cursor = newline + 1;
+            newline = next_newline(cursor, parser->end - cursor);
+
+            size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
+            if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
+                const char *engine;
+                if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
+                    found_shebang = true;
+
+                    if (newline != NULL) {
+                        pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
+                        parser->encoding_comment_start = newline + 1;
+                    }
+
+                    if (options != NULL && options->shebang_callback != NULL) {
+                        pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
+                    }
+
+                    break;
+                }
+            }
+        }
+
+        if (found_shebang) {
+            parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
+            parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
+        } else {
+            pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
+            pm_newline_list_clear(&parser->newline_list);
+        }
+    }
+
+    // The encoding comment can start after any amount of inline whitespace, so
+    // here we'll advance it to the first non-inline-whitespace character so
+    // that it is ready for future comparisons.
+    parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
+}
+
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
+    parser->encoding_changed_callback = callback;
+}
+
+/**
+ * Free all of the memory associated with the comment list.
+ */
+static inline void
+pm_comment_list_free(pm_list_t *list) {
+    pm_list_node_t *node, *next;
+
+    for (node = list->head; node != NULL; node = next) {
+        next = node->next;
+
+        pm_comment_t *comment = (pm_comment_t *) node;
+        xfree(comment);
+    }
+}
+
+/**
+ * Free all of the memory associated with the magic comment list.
+ */
+static inline void
+pm_magic_comment_list_free(pm_list_t *list) {
+    pm_list_node_t *node, *next;
+
+    for (node = list->head; node != NULL; node = next) {
+        next = node->next;
+
+        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
+        xfree(magic_comment);
+    }
+}
+
+/**
+ * Free any memory associated with the given parser.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_parser_free(pm_parser_t *parser) {
+    pm_string_free(&parser->filepath);
+    pm_diagnostic_list_free(&parser->error_list);
+    pm_diagnostic_list_free(&parser->warning_list);
+    pm_comment_list_free(&parser->comment_list);
+    pm_magic_comment_list_free(&parser->magic_comment_list);
+    pm_constant_pool_free(&parser->constant_pool);
+    pm_newline_list_free(&parser->newline_list);
+
+    while (parser->current_scope != NULL) {
+        // Normally, popping the scope doesn't free the locals since it is
+        // assumed that ownership has transferred to the AST. However if we have
+        // scopes while we're freeing the parser, it's likely they came from
+        // eval scopes and we need to free them explicitly here.
+        pm_parser_scope_pop(parser);
+    }
+
+    while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
+        lex_mode_pop(parser);
+    }
+}
+
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t *
+pm_parse(pm_parser_t *parser) {
+    return parse_program(parser);
+}
+
+/**
+ * Read into the stream until the gets callback returns false. If the last read
+ * line from the stream matches an __END__ marker, then halt and return false,
+ * otherwise return true.
+ */
+static bool
+pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof) {
+#define LINE_SIZE 4096
+    char line[LINE_SIZE];
+
+    while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
+        size_t length = LINE_SIZE;
+        while (length > 0 && line[length - 1] == '\n') length--;
+
+        if (length == LINE_SIZE) {
+            // If we read a line that is the maximum size and it doesn't end
+            // with a newline, then we'll just append it to the buffer and
+            // continue reading.
+            length--;
+            pm_buffer_append_string(buffer, line, length);
+            continue;
+        }
+
+        // Append the line to the buffer.
+        length--;
+        pm_buffer_append_string(buffer, line, length);
+
+        // Check if the line matches the __END__ marker. If it does, then stop
+        // reading and return false. In most circumstances, this means we should
+        // stop reading from the stream so that the DATA constant can pick it
+        // up.
+        switch (length) {
+            case 7:
+                if (strncmp(line, "__END__", 7) == 0) return false;
+                break;
+            case 8:
+                if (strncmp(line, "__END__\n", 8) == 0) return false;
+                break;
+            case 9:
+                if (strncmp(line, "__END__\r\n", 9) == 0) return false;
+                break;
+        }
+
+        // All data should be read via gets.  If the string returned by gets
+        // _doesn't_ end with a newline, then we assume we hit EOF condition.
+        if (stream_feof(stream)) {
+            break;
+        }
+    }
+
+    return true;
+#undef LINE_SIZE
+}
+
+/**
+ * Determine if there was an unterminated heredoc at the end of the input, which
+ * would mean the stream isn't finished and we should keep reading.
+ *
+ * For the other lex modes we can check if the lex mode has been closed, but for
+ * heredocs when we hit EOF we close the lex mode and then go back to parse the
+ * rest of the line after the heredoc declaration so that we get more of the
+ * syntax tree.
+ */
+static bool
+pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
+
+    for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
+        if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * Prism is designed around having the entire source in memory at once, but you
+ * can stream stdin in to Ruby so we need to support a streaming API.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t *
+pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options) {
+    pm_buffer_init(buffer);
+
+    bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
+
+    pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
+    pm_node_t *node = pm_parse(parser);
+
+    while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
+        pm_node_destroy(parser, node);
+        eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
+
+        pm_parser_free(parser);
+        pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
+        node = pm_parse(parser);
+    }
+
+    return node;
+}
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, source, size, &options);
+
+    pm_node_t *node = pm_parse(&parser);
+    pm_node_destroy(&parser, node);
+
+    bool result = parser.error_list.size == 0;
+    pm_parser_free(&parser);
+    pm_options_free(&options);
+
+    return result;
+}
+
+#undef PM_CASE_KEYWORD
+#undef PM_CASE_OPERATOR
+#undef PM_CASE_WRITABLE
+#undef PM_STRING_EMPTY
+
+// We optionally support serializing to a binary string. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_SERIALIZATION define.
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+static inline void
+pm_serialize_header(pm_buffer_t *buffer) {
+    pm_buffer_append_string(buffer, "PRISM", 5);
+    pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
+    pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
+    pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
+    pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
+}
+
+/**
+ * Serialize the AST represented by the given node to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+    pm_serialize_header(buffer);
+    pm_serialize_content(parser, node, buffer);
+    pm_buffer_append_byte(buffer, '\0');
+}
+
+/**
+ * Parse and serialize the AST represented by the given source to the given
+ * buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, source, size, &options);
+
+    pm_node_t *node = pm_parse(&parser);
+
+    pm_serialize_header(buffer);
+    pm_serialize_content(&parser, node, buffer);
+    pm_buffer_append_byte(buffer, '\0');
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+    pm_options_free(&options);
+}
+
+/**
+ * Parse and serialize the AST represented by the source that is read out of the
+ * given stream into to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data) {
+    pm_parser_t parser;
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_buffer_t parser_buffer;
+    pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, stream_feof, &options);
+    pm_serialize_header(buffer);
+    pm_serialize_content(&parser, node, buffer);
+    pm_buffer_append_byte(buffer, '\0');
+
+    pm_node_destroy(&parser, node);
+    pm_buffer_free(&parser_buffer);
+    pm_parser_free(&parser);
+    pm_options_free(&options);
+}
+
+/**
+ * Parse and serialize the comments in the given source to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, source, size, &options);
+
+    pm_node_t *node = pm_parse(&parser);
+    pm_serialize_header(buffer);
+    pm_serialize_encoding(parser.encoding, buffer);
+    pm_buffer_append_varsint(buffer, parser.start_line);
+    pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+    pm_options_free(&options);
+}
+
+#endif
+
+/******************************************************************************/
+/* Slice queries for the Ruby API                                             */
+/******************************************************************************/
+
+/** The category of slice returned from pm_slice_type. */
+typedef enum {
+    /** Returned when the given encoding name is invalid. */
+    PM_SLICE_TYPE_ERROR = -1,
+
+    /** Returned when no other types apply to the slice. */
+    PM_SLICE_TYPE_NONE,
+
+    /** Returned when the slice is a valid local variable name. */
+    PM_SLICE_TYPE_LOCAL,
+
+    /** Returned when the slice is a valid constant name. */
+    PM_SLICE_TYPE_CONSTANT,
+
+    /** Returned when the slice is a valid method name. */
+    PM_SLICE_TYPE_METHOD_NAME
+} pm_slice_type_t;
+
+/**
+ * Check that the slice is a valid local variable name or constant.
+ */
+pm_slice_type_t
+pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
+    // first, get the right encoding object
+    const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
+    if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
+
+    // check that there is at least one character
+    if (length == 0) return PM_SLICE_TYPE_NONE;
+
+    size_t width;
+    if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
+        // valid because alphabetical
+    } else if (*source == '_') {
+        // valid because underscore
+        width = 1;
+    } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
+        // valid because multibyte
+    } else {
+        // invalid because no match
+        return PM_SLICE_TYPE_NONE;
+    }
+
+    // determine the type of the slice based on the first character
+    const uint8_t *end = source + length;
+    pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
+
+    // next, iterate through all of the bytes of the string to ensure that they
+    // are all valid identifier characters
+    source += width;
+
+    while (source < end) {
+        if ((width = encoding->alnum_char(source, end - source)) != 0) {
+            // valid because alphanumeric
+            source += width;
+        } else if (*source == '_') {
+            // valid because underscore
+            source++;
+        } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
+            // valid because multibyte
+            source += width;
+        } else {
+            // invalid because no match
+            break;
+        }
+    }
+
+    // accept a ! or ? at the end of the slice as a method name
+    if (*source == '!' || *source == '?' || *source == '=') {
+        source++;
+        result = PM_SLICE_TYPE_METHOD_NAME;
+    }
+
+    // valid if we are at the end of the slice
+    return source == end ? result : PM_SLICE_TYPE_NONE;
+}
+
+/**
+ * Check that the slice is a valid local variable name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+        case PM_SLICE_TYPE_CONSTANT:
+        case PM_SLICE_TYPE_METHOD_NAME:
+            return PM_STRING_QUERY_FALSE;
+        case PM_SLICE_TYPE_LOCAL:
+            return PM_STRING_QUERY_TRUE;
+    }
+
+    assert(false && "unreachable");
+    return PM_STRING_QUERY_FALSE;
+}
+
+/**
+ * Check that the slice is a valid constant name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+        case PM_SLICE_TYPE_LOCAL:
+        case PM_SLICE_TYPE_METHOD_NAME:
+            return PM_STRING_QUERY_FALSE;
+        case PM_SLICE_TYPE_CONSTANT:
+            return PM_STRING_QUERY_TRUE;
+    }
+
+    assert(false && "unreachable");
+    return PM_STRING_QUERY_FALSE;
+}
+
+/**
+ * Check that the slice is a valid method name.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t
+pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
+#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
+#define C1(c) (*source == c)
+#define C2(s) (memcmp(source, s, 2) == 0)
+#define C3(s) (memcmp(source, s, 3) == 0)
+
+    switch (pm_slice_type(source, length, encoding_name)) {
+        case PM_SLICE_TYPE_ERROR:
+            return PM_STRING_QUERY_ERROR;
+        case PM_SLICE_TYPE_NONE:
+            break;
+        case PM_SLICE_TYPE_LOCAL:
+            // numbered parameters are not valid method names
+            return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
+        case PM_SLICE_TYPE_CONSTANT:
+            // all constants are valid method names
+        case PM_SLICE_TYPE_METHOD_NAME:
+            // all method names are valid method names
+            return PM_STRING_QUERY_TRUE;
+    }
+
+    switch (length) {
+        case 1:
+            return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
+        case 2:
+            return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
+        case 3:
+            return B(C3("===") || C3("<=>") || C3("[]="));
+        default:
+            return PM_STRING_QUERY_FALSE;
+    }
+
+#undef B
+#undef C1
+#undef C2
+#undef C3
+}
diff --git a/prism/prism.h b/prism/prism.h
new file mode 100644
index 0000000000..c468db18be
--- /dev/null
+++ b/prism/prism.h
@@ -0,0 +1,408 @@
+/**
+ * @file prism.h
+ *
+ * The main header file for the prism parser.
+ */
+#ifndef PRISM_H
+#define PRISM_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_buffer.h"
+#include "prism/util/pm_char.h"
+#include "prism/util/pm_integer.h"
+#include "prism/util/pm_memchr.h"
+#include "prism/util/pm_strncasecmp.h"
+#include "prism/util/pm_strpbrk.h"
+#include "prism/ast.h"
+#include "prism/diagnostic.h"
+#include "prism/node.h"
+#include "prism/options.h"
+#include "prism/pack.h"
+#include "prism/parser.h"
+#include "prism/prettyprint.h"
+#include "prism/regexp.h"
+#include "prism/static_literals.h"
+#include "prism/version.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <locale.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef _WIN32
+#include <strings.h>
+#endif
+
+/**
+ * The prism version and the serialization format.
+ *
+ * @returns The prism version as a constant string.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_version(void);
+
+/**
+ * Initialize a parser with the given start and end pointers.
+ *
+ * The resulting parser must eventually be freed with `pm_parser_free()`.
+ *
+ * @param parser The parser to initialize.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param options The optional options to use when parsing. These options must
+ *   live for the whole lifetime of this parser.
+ *
+ * \public \memberof pm_parser
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
+
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ *
+ * \public \memberof pm_parser
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
+
+/**
+ * Free any memory associated with the given parser.
+ *
+ * This does not free the `pm_options_t` object that was used to initialize the
+ * parser.
+ *
+ * @param parser The parser to free.
+ *
+ * \public \memberof pm_parser
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
+
+/**
+ * Initiate the parser with the given parser.
+ *
+ * @param parser The parser to use.
+ * @return The AST representing the source.
+ *
+ * \public \memberof pm_parser
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
+
+/**
+ * This function is used in pm_parse_stream() to retrieve a line of input from a
+ * stream. It closely mirrors that of fgets so that fgets can be used as the
+ * default implementation.
+ */
+typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
+
+/**
+ * This function is used in pm_parse_stream to check whether a stream is EOF.
+ * It closely mirrors that of feof so that feof can be used as the
+ * default implementation.
+ */
+typedef int (pm_parse_stream_feof_t)(void *stream);
+
+/**
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * @param parser The parser to use.
+ * @param buffer The buffer to use.
+ * @param stream The stream to parse.
+ * @param stream_fgets The function to use to read from the stream.
+ * @param stream_feof The function to use to determine if the stream has hit eof.
+ * @param options The optional options to use when parsing.
+ * @return The AST representing the source.
+ *
+ * \public \memberof pm_parser
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options);
+
+// We optionally support serializing to a binary string. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_SERIALIZATION define.
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+/**
+ * Parse and serialize the AST represented by the source that is read out of the
+ * given stream into to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param stream The stream to parse.
+ * @param stream_fgets The function to use to read from the stream.
+ * @param stream_feof The function to use to tell if the stream has hit eof.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const char *data);
+
+/**
+ * Serialize the given list of comments to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param list The list of comments to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
+
+/**
+ * Serialize the name of the encoding to the buffer.
+ *
+ * @param encoding The encoding to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer);
+
+/**
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
+
+/**
+ * Serialize the AST represented by the given node to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
+
+/**
+ * Parse the given source to the AST and dump the AST to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
+
+/**
+ * Parse and serialize the comments in the given source to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
+
+/**
+ * Lex the given source and serialize to the given buffer.
+ *
+ * @param source The source to lex.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param data The optional data to pass to the lexer.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
+
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
+
+#endif
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ * @return True if the source parses without errors or warnings.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data);
+
+/**
+ * Returns a string representation of the given token type.
+ *
+ * @param token_type The token type to convert to a string.
+ * @return A string representation of the given token type.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type);
+
+/**
+ * Returns the human name of the given token type.
+ *
+ * @param token_type The token type to convert to a human name.
+ * @return The human name of the given token type.
+ */
+const char * pm_token_type_human(pm_token_type_t token_type);
+
+// We optionally support dumping to JSON. For systems that don't want or need
+// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
+#ifndef PRISM_EXCLUDE_JSON
+
+/**
+ * Dump JSON to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param parser The parser that parsed the node.
+ * @param node The node to serialize.
+ */
+PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
+
+#endif
+
+/**
+ * Represents the results of a slice query.
+ */
+typedef enum {
+    /** Returned if the encoding given to a slice query was invalid. */
+    PM_STRING_QUERY_ERROR = -1,
+
+    /** Returned if the result of the slice query is false. */
+    PM_STRING_QUERY_FALSE,
+
+    /** Returned if the result of the slice query is true. */
+    PM_STRING_QUERY_TRUE
+} pm_string_query_t;
+
+/**
+ * Check that the slice is a valid local variable name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name);
+
+/**
+ * Check that the slice is a valid constant name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name);
+
+/**
+ * Check that the slice is a valid method name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ *   the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name);
+
+/**
+ * @mainpage
+ *
+ * Prism is a parser for the Ruby programming language. It is designed to be
+ * portable, error tolerant, and maintainable. It is written in C99 and has no
+ * dependencies. It is currently being integrated into
+ * [CRuby](https://github.com/ruby/ruby),
+ * [JRuby](https://github.com/jruby/jruby),
+ * [TruffleRuby](https://github.com/truffleruby/truffleruby),
+ * [Sorbet](https://github.com/sorbet/sorbet), and
+ * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
+ *
+ * @section getting-started Getting started
+ *
+ * If you're vendoring this project and compiling it statically then as long as
+ * you have a C99 compiler you will be fine. If you're linking against it as
+ * shared library, then you should compile with `-fvisibility=hidden` and
+ * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
+ * visible.
+ *
+ * @section parsing Parsing
+ *
+ * In order to parse Ruby code, the structures and functions that you're going
+ * to want to use and be aware of are:
+ *
+ * * `pm_parser_t` - the main parser structure
+ * * `pm_parser_init()` - initialize a parser
+ * * `pm_parse()` - parse and return the root node
+ * * `pm_node_destroy()` - deallocate the root node returned by `pm_parse()`
+ * * `pm_parser_free()` - free the internal memory of the parser
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void parse(const uint8_t *source, size_t length) {
+ *     pm_parser_t parser;
+ *     pm_parser_init(&parser, source, length, NULL);
+ *
+ *     pm_node_t *root = pm_parse(&parser);
+ *     printf("PARSED!\n");
+ *
+ *     pm_node_destroy(&parser, root);
+ *     pm_parser_free(&parser);
+ * }
+ * ```
+ *
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures
+ * as their first member. This means you can downcast and upcast any node in the
+ * tree to a `pm_node_t`.
+ *
+ * @section serializing Serializing
+ *
+ * Prism provides the ability to serialize the AST and its related metadata into
+ * a binary format. This format is designed to be portable to different
+ * languages and runtimes so that you only need to make one FFI call in order to
+ * parse Ruby code. The structures and functions that you're going to want to
+ * use and be aware of are:
+ *
+ * * `pm_buffer_t` - a small buffer object that will hold the serialized AST
+ * * `pm_buffer_free()` - free the memory associated with the buffer
+ * * `pm_serialize()` - serialize the AST into a buffer
+ * * `pm_serialize_parse()` - parse and serialize the AST into a buffer
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void serialize(const uint8_t *source, size_t length) {
+ *     pm_buffer_t buffer = { 0 };
+ *
+ *     pm_serialize_parse(&buffer, source, length, NULL);
+ *     printf("SERIALIZED!\n");
+ *
+ *     pm_buffer_free(&buffer);
+ * }
+ * ```
+ *
+ * @section inspecting Inspecting
+ *
+ * Prism provides the ability to inspect the AST by pretty-printing nodes. You
+ * can do this with the `pm_prettyprint()` function, which you would use like:
+ *
+ * ```c
+ * void prettyprint(const uint8_t *source, size_t length) {
+ *     pm_parser_t parser;
+ *     pm_parser_init(&parser, source, length, NULL);
+ *
+ *     pm_node_t *root = pm_parse(&parser);
+ *     pm_buffer_t buffer = { 0 };
+ *
+ *     pm_prettyprint(&buffer, &parser, root);
+ *     printf("%*.s\n", (int) buffer.length, buffer.value);
+ *
+ *     pm_buffer_free(&buffer);
+ *     pm_node_destroy(&parser, root);
+ *     pm_parser_free(&parser);
+ * }
+ * ```
+ */
+
+#endif
diff --git a/prism/regexp.c b/prism/regexp.c
new file mode 100644
index 0000000000..dcc7476244
--- /dev/null
+++ b/prism/regexp.c
@@ -0,0 +1,790 @@
+#include "prism/regexp.h"
+
+#define PM_REGEXP_PARSE_DEPTH_MAX 4096
+
+/**
+ * This is the parser that is going to handle parsing regular expressions.
+ */
+typedef struct {
+    /** The parser that is currently being used. */
+    pm_parser_t *parser;
+
+    /** A pointer to the start of the source that we are parsing. */
+    const uint8_t *start;
+
+    /** A pointer to the current position in the source. */
+    const uint8_t *cursor;
+
+    /** A pointer to the end of the source that we are parsing. */
+    const uint8_t *end;
+
+    /**
+     * Whether or not the regular expression currently being parsed is in
+     * extended mode, wherein whitespace is ignored and comments are allowed.
+     */
+    bool extended_mode;
+
+    /** Whether the encoding has changed from the default. */
+    bool encoding_changed;
+
+    /** The encoding of the source. */
+    const pm_encoding_t *encoding;
+
+    /** The callback to call when a named capture group is found. */
+    pm_regexp_name_callback_t name_callback;
+
+    /** The data to pass to the name callback. */
+    void *name_data;
+
+    /** The callback to call when a parse error is found. */
+    pm_regexp_error_callback_t error_callback;
+
+    /** The data to pass to the error callback. */
+    void *error_data;
+} pm_regexp_parser_t;
+
+/**
+ * Append an error to the parser.
+ */
+static inline void
+pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
+    parser->error_callback(start, end, message, parser->error_data);
+}
+
+/**
+ * This appends a new string to the list of named captures. This function
+ * assumes the caller has already checked the validity of the name callback.
+ */
+static void
+pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    pm_string_t string;
+    pm_string_shared_init(&string, start, end);
+    parser->name_callback(&string, parser->name_data);
+    pm_string_free(&string);
+}
+
+/**
+ * Returns true if the next character is the end of the source.
+ */
+static inline bool
+pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
+    return parser->cursor >= parser->end;
+}
+
+/**
+ * Optionally accept a char and consume it if it exists.
+ */
+static inline bool
+pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
+    if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
+        parser->cursor++;
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Expect a character to be present and consume it.
+ */
+static inline bool
+pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
+    if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
+        parser->cursor++;
+        return true;
+    }
+    return false;
+}
+
+/**
+ * This advances the current token to the next instance of the given character.
+ */
+static bool
+pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
+    if (pm_regexp_char_is_eof(parser)) {
+        return false;
+    }
+
+    const uint8_t *end = (const uint8_t *) pm_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
+    if (end == NULL) {
+        return false;
+    }
+
+    parser->cursor = end + 1;
+    return true;
+}
+
+/**
+ * Range quantifiers are a special class of quantifiers that look like
+ *
+ * * {digit}
+ * * {digit,}
+ * * {digit,digit}
+ * * {,digit}
+ *
+ * Unfortunately, if there are any spaces in between, then this just becomes a
+ * regular character match expression and we have to backtrack. So when this
+ * function first starts running, we'll create a "save" point and then attempt
+ * to parse the quantifier. If it fails, we'll restore the save point and
+ * return.
+ *
+ * The properly track everything, we're going to build a little state machine.
+ * It looks something like the following:
+ *
+ *                  +-------+                 +---------+ ------------+
+ * ---- lbrace ---> | start | ---- digit ---> | minimum |             |
+ *                  +-------+                 +---------+ <--- digit -+
+ *                      |                       |    |
+ *   +-------+          |                       |  rbrace
+ *   | comma | <----- comma  +---- comma -------+    |
+ *   +-------+               V                       V
+ *      |             +---------+               +---------+
+ *      +-- digit --> | maximum | -- rbrace --> || final ||
+ *                    +---------+               +---------+
+ *                    |         ^
+ *                    +- digit -+
+ *
+ * Note that by the time we've hit this function, the lbrace has already been
+ * consumed so we're in the start state.
+ */
+static bool
+pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
+    const uint8_t *savepoint = parser->cursor;
+
+    enum {
+        PM_REGEXP_RANGE_QUANTIFIER_STATE_START,
+        PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
+        PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
+        PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
+    } state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
+
+    while (1) {
+        if (parser->cursor >= parser->end) {
+            parser->cursor = savepoint;
+            return true;
+        }
+
+        switch (state) {
+            case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
+                switch (*parser->cursor) {
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                        parser->cursor++;
+                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
+                        break;
+                    case ',':
+                        parser->cursor++;
+                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
+                        break;
+                    default:
+                        parser->cursor = savepoint;
+                        return true;
+                }
+                break;
+            case PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
+                switch (*parser->cursor) {
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                        parser->cursor++;
+                        break;
+                    case ',':
+                        parser->cursor++;
+                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
+                        break;
+                    case '}':
+                        parser->cursor++;
+                        return true;
+                    default:
+                        parser->cursor = savepoint;
+                        return true;
+                }
+                break;
+            case PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
+                switch (*parser->cursor) {
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                        parser->cursor++;
+                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
+                        break;
+                    default:
+                        parser->cursor = savepoint;
+                        return true;
+                }
+                break;
+            case PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
+                switch (*parser->cursor) {
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                        parser->cursor++;
+                        break;
+                    case '}':
+                        parser->cursor++;
+                        return true;
+                    default:
+                        parser->cursor = savepoint;
+                        return true;
+                }
+                break;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * quantifier : star-quantifier
+ *            | plus-quantifier
+ *            | optional-quantifier
+ *            | range-quantifier
+ *            | <empty>
+ *            ;
+ */
+static bool
+pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
+    while (!pm_regexp_char_is_eof(parser)) {
+        switch (*parser->cursor) {
+            case '*':
+            case '+':
+            case '?':
+                parser->cursor++;
+                break;
+            case '{':
+                parser->cursor++;
+                if (!pm_regexp_parse_range_quantifier(parser)) return false;
+                break;
+            default:
+                // In this case there is no quantifier.
+                return true;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
+ *                   ;
+ */
+static bool
+pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
+    if (!pm_regexp_char_expect(parser, ':')) {
+        return false;
+    }
+
+    pm_regexp_char_accept(parser, '^');
+
+    return (
+        pm_regexp_char_find(parser, ':') &&
+        pm_regexp_char_expect(parser, ']') &&
+        pm_regexp_char_expect(parser, ']')
+    );
+}
+
+// Forward declaration because character sets can be nested.
+static bool
+pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
+
+/**
+ * match-char-set : '[' '^'? (match-range | match-char)* ']'
+ *                ;
+ */
+static bool
+pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
+    pm_regexp_char_accept(parser, '^');
+
+    while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
+        switch (*parser->cursor++) {
+            case '[':
+                pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
+                break;
+            case '\\':
+                if (!pm_regexp_char_is_eof(parser)) {
+                    parser->cursor++;
+                }
+                break;
+            default:
+                // do nothing, we've already advanced the cursor
+                break;
+        }
+    }
+
+    return pm_regexp_char_expect(parser, ']');
+}
+
+/**
+ * A left bracket can either mean a POSIX class or a character set.
+ */
+static bool
+pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth) {
+    if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
+        pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
+        return false;
+    }
+
+    if ((parser->cursor < parser->end) && parser->cursor[0] == ']') {
+        parser->cursor++;
+        pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "empty char-class");
+        return true;
+    }
+
+    const uint8_t *reset = parser->cursor;
+
+    if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
+        parser->cursor++;
+        if (pm_regexp_parse_posix_class(parser)) return true;
+
+        parser->cursor = reset;
+    }
+
+    return pm_regexp_parse_character_set(parser, depth);
+}
+
+// Forward declaration here since parsing groups needs to go back up the grammar
+// to parse expressions within them.
+static bool
+pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth);
+
+/**
+ * These are the states of the options that are configurable on the regular
+ * expression (or from within a group).
+ */
+typedef enum {
+    PM_REGEXP_OPTION_STATE_INVALID,
+    PM_REGEXP_OPTION_STATE_TOGGLEABLE,
+    PM_REGEXP_OPTION_STATE_ADDABLE,
+    PM_REGEXP_OPTION_STATE_ADDED,
+    PM_REGEXP_OPTION_STATE_REMOVED
+} pm_regexp_option_state_t;
+
+// These are the options that are configurable on the regular expression (or
+// from within a group).
+
+#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
+#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
+#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
+
+/**
+ * This is the set of options that are configurable on the regular expression.
+ */
+typedef struct {
+    /** The current state of each option. */
+    uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
+} pm_regexp_options_t;
+
+/**
+ * Initialize a new set of options to their default values.
+ */
+static void
+pm_regexp_options_init(pm_regexp_options_t *options) {
+    memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
+    options->values['i' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
+    options->values['m' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
+    options->values['x' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
+    options->values['d' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
+    options->values['a' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
+    options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
+}
+
+/**
+ * Attempt to add the given option to the set of options. Returns true if it was
+ * added, false if it was already present.
+ */
+static bool
+pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
+    if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
+        key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
+
+        switch (options->values[key]) {
+            case PM_REGEXP_OPTION_STATE_INVALID:
+            case PM_REGEXP_OPTION_STATE_REMOVED:
+                return false;
+            case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
+            case PM_REGEXP_OPTION_STATE_ADDABLE:
+                options->values[key] = PM_REGEXP_OPTION_STATE_ADDED;
+                return true;
+            case PM_REGEXP_OPTION_STATE_ADDED:
+                return true;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * Attempt to remove the given option from the set of options. Returns true if
+ * it was removed, false if it was already absent.
+ */
+static bool
+pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
+    if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
+        key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
+
+        switch (options->values[key]) {
+            case PM_REGEXP_OPTION_STATE_INVALID:
+            case PM_REGEXP_OPTION_STATE_ADDABLE:
+                return false;
+            case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
+            case PM_REGEXP_OPTION_STATE_ADDED:
+            case PM_REGEXP_OPTION_STATE_REMOVED:
+                options->values[key] = PM_REGEXP_OPTION_STATE_REMOVED;
+                return true;
+        }
+    }
+
+    return false;
+}
+
+/**
+ * True if the given key is set in the options.
+ */
+static uint8_t
+pm_regexp_options_state(pm_regexp_options_t *options, uint8_t key) {
+    if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
+        key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
+        return options->values[key];
+    }
+
+    return false;
+}
+
+/**
+ * Groups can have quite a few different patterns for syntax. They basically
+ * just wrap a set of expressions, but they can potentially have options after a
+ * question mark. If there _isn't_ a question mark, then it's just a set of
+ * expressions. If there _is_, then here are the options:
+ *
+ * * (?#...)                       - inline comments
+ * * (?:subexp)                    - non-capturing group
+ * * (?=subexp)                    - positive lookahead
+ * * (?!subexp)                    - negative lookahead
+ * * (?>subexp)                    - atomic group
+ * * (?~subexp)                    - absence operator
+ * * (?<=subexp)                   - positive lookbehind
+ * * (?<!subexp)                   - negative lookbehind
+ * * (?<name>subexp)               - named capturing group
+ * * (?'name'subexp)               - named capturing group
+ * * (?(cond)yes-subexp)           - conditional expression
+ * * (?(cond)yes-subexp|no-subexp) - conditional expression
+ * * (?imxdau-imx)                 - turn on and off configuration
+ * * (?imxdau-imx:subexp)          - turn on and off configuration for an expression
+ */
+static bool
+pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
+    const uint8_t *group_start = parser->cursor;
+
+    pm_regexp_options_t options;
+    pm_regexp_options_init(&options);
+
+    // First, parse any options for the group.
+    if (pm_regexp_char_accept(parser, '?')) {
+        if (pm_regexp_char_is_eof(parser)) {
+            pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
+            return false;
+        }
+
+        switch (*parser->cursor) {
+            case '#': { // inline comments
+                parser->cursor++;
+                if (pm_regexp_char_is_eof(parser)) {
+                    pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
+                    return false;
+                }
+
+                if (parser->encoding_changed && parser->encoding->multibyte) {
+                    bool escaped = false;
+
+                    // Here we're going to take a slow path and iterate through
+                    // each multibyte character to find the close paren. We do
+                    // this because \ can be a trailing byte in some encodings.
+                    while (parser->cursor < parser->end) {
+                        if (!escaped && *parser->cursor == ')') {
+                            parser->cursor++;
+                            return true;
+                        }
+
+                        size_t width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
+                        if (width == 0) return false;
+
+                        escaped = (width == 1) && (*parser->cursor == '\\');
+                        parser->cursor += width;
+                    }
+
+                    return false;
+                } else {
+                    // Here we can take the fast path and use memchr to find the
+                    // next ) because we are safe checking backward for \ since
+                    // it cannot be a trailing character.
+                    bool found = pm_regexp_char_find(parser, ')');
+
+                    while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
+                        found = pm_regexp_char_find(parser, ')');
+                    }
+
+                    return found;
+                }
+            }
+            case ':': // non-capturing group
+            case '=': // positive lookahead
+            case '!': // negative lookahead
+            case '>': // atomic group
+            case '~': // absence operator
+                parser->cursor++;
+                break;
+            case '<':
+                parser->cursor++;
+                if (pm_regexp_char_is_eof(parser)) {
+                    pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
+                    return false;
+                }
+
+                switch (*parser->cursor) {
+                    case '=': // positive lookbehind
+                    case '!': // negative lookbehind
+                        parser->cursor++;
+                        break;
+                    default: { // named capture group
+                        const uint8_t *start = parser->cursor;
+                        if (!pm_regexp_char_find(parser, '>')) {
+                            return false;
+                        }
+
+                        if (parser->cursor - start == 1) {
+                            pm_regexp_parse_error(parser, start, parser->cursor, "group name is empty");
+                        }
+
+                        if (parser->name_callback != NULL) {
+                            pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
+                        }
+
+                        break;
+                    }
+                }
+                break;
+            case '\'': { // named capture group
+                const uint8_t *start = ++parser->cursor;
+                if (!pm_regexp_char_find(parser, '\'')) {
+                    return false;
+                }
+
+                if (parser->name_callback != NULL) {
+                    pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
+                }
+
+                break;
+            }
+            case '(': // conditional expression
+                if (!pm_regexp_char_find(parser, ')')) {
+                    return false;
+                }
+                break;
+            case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
+                while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
+                    if (!pm_regexp_options_add(&options, *parser->cursor)) {
+                        return false;
+                    }
+                    parser->cursor++;
+                }
+
+                if (pm_regexp_char_is_eof(parser)) {
+                    return false;
+                }
+
+                // If we are at the end of the group of options and there is no
+                // subexpression, then we are going to be setting the options
+                // for the parent group. In this case we are safe to return now.
+                if (*parser->cursor == ')') {
+                    if (pm_regexp_options_state(&options, 'x') == PM_REGEXP_OPTION_STATE_ADDED) {
+                        parser->extended_mode = true;
+                    }
+
+                    parser->cursor++;
+                    return true;
+                }
+
+                // If we hit a -, then we're done parsing options.
+                if (*parser->cursor != '-') break;
+
+                PRISM_FALLTHROUGH
+            case '-':
+                parser->cursor++;
+                while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
+                    if (!pm_regexp_options_remove(&options, *parser->cursor)) {
+                        return false;
+                    }
+                    parser->cursor++;
+                }
+
+                if (pm_regexp_char_is_eof(parser)) {
+                    return false;
+                }
+
+                // If we are at the end of the group of options and there is no
+                // subexpression, then we are going to be setting the options
+                // for the parent group. In this case we are safe to return now.
+                if (*parser->cursor == ')') {
+                    switch (pm_regexp_options_state(&options, 'x')) {
+                        case PM_REGEXP_OPTION_STATE_ADDED:
+                            parser->extended_mode = true;
+                            break;
+                        case PM_REGEXP_OPTION_STATE_REMOVED:
+                            parser->extended_mode = false;
+                            break;
+                    }
+
+                    parser->cursor++;
+                    return true;
+                }
+
+                break;
+            default:
+                parser->cursor++;
+                pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "undefined group option");
+                break;
+        }
+    }
+
+    bool extended_mode = parser->extended_mode;
+    switch (pm_regexp_options_state(&options, 'x')) {
+        case PM_REGEXP_OPTION_STATE_ADDED:
+            parser->extended_mode = true;
+            break;
+        case PM_REGEXP_OPTION_STATE_REMOVED:
+            parser->extended_mode = false;
+            break;
+    }
+
+    // Now, parse the expressions within this group.
+    while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
+        if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
+            parser->extended_mode = extended_mode;
+            return false;
+        }
+        pm_regexp_char_accept(parser, '|');
+    }
+
+    // Finally, make sure we have a closing parenthesis.
+    parser->extended_mode = extended_mode;
+    if (pm_regexp_char_expect(parser, ')')) return true;
+
+    pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
+    return false;
+}
+
+/**
+ * item : anchor
+ *      | match-posix-class
+ *      | match-char-set
+ *      | match-char-class
+ *      | match-char-prop
+ *      | match-char
+ *      | match-any
+ *      | group
+ *      | quantified
+ *      ;
+ */
+static bool
+pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
+    switch (*parser->cursor) {
+        case '^':
+        case '$':
+            parser->cursor++;
+            return pm_regexp_parse_quantifier(parser);
+        case '\\':
+            parser->cursor++;
+            if (!pm_regexp_char_is_eof(parser)) {
+                parser->cursor++;
+            }
+            return pm_regexp_parse_quantifier(parser);
+        case '(':
+            parser->cursor++;
+            return pm_regexp_parse_group(parser, depth) && pm_regexp_parse_quantifier(parser);
+        case '[':
+            parser->cursor++;
+            return pm_regexp_parse_lbracket(parser, depth) && pm_regexp_parse_quantifier(parser);
+        case '*':
+        case '?':
+        case '+':
+            parser->cursor++;
+            pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "target of repeat operator is not specified");
+            return true;
+        case ')':
+            parser->cursor++;
+            pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
+            return true;
+        case '#':
+            if (parser->extended_mode) {
+                if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
+                return true;
+            }
+        PRISM_FALLTHROUGH
+        default: {
+            size_t width;
+            if (!parser->encoding_changed) {
+                width = pm_encoding_utf_8_char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
+            } else {
+                width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
+            }
+
+            if (width == 0) return false; // TODO: add appropriate error
+            parser->cursor += width;
+
+            return pm_regexp_parse_quantifier(parser);
+        }
+    }
+}
+
+/**
+ * expression : item+
+ *            ;
+ */
+static bool
+pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth) {
+    if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
+        pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
+        return false;
+    }
+
+    if (!pm_regexp_parse_item(parser, depth)) {
+        return false;
+    }
+
+    while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
+        if (!pm_regexp_parse_item(parser, depth)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/**
+ * pattern : EOF
+ *         | expression EOF
+ *         | expression '|' pattern
+ *         ;
+ */
+static bool
+pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
+    do {
+        if (pm_regexp_char_is_eof(parser)) return true;
+        if (!pm_regexp_parse_expression(parser, 0)) return false;
+    } while (pm_regexp_char_accept(parser, '|'));
+
+    return pm_regexp_char_is_eof(parser);
+}
+
+/**
+ * Parse a regular expression and extract the names of all of the named capture
+ * groups.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
+    pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
+        .parser = parser,
+        .start = source,
+        .cursor = source,
+        .end = source + size,
+        .extended_mode = extended_mode,
+        .encoding_changed = parser->encoding_changed,
+        .encoding = parser->encoding,
+        .name_callback = name_callback,
+        .name_data = name_data,
+        .error_callback = error_callback,
+        .error_data = error_data
+    });
+}
diff --git a/prism/regexp.h b/prism/regexp.h
new file mode 100644
index 0000000000..5366b5a5a0
--- /dev/null
+++ b/prism/regexp.h
@@ -0,0 +1,43 @@
+/**
+ * @file regexp.h
+ *
+ * A regular expression parser.
+ */
+#ifndef PRISM_REGEXP_H
+#define PRISM_REGEXP_H
+
+#include "prism/defines.h"
+#include "prism/parser.h"
+#include "prism/encoding.h"
+#include "prism/util/pm_memchr.h"
+#include "prism/util/pm_string.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+/**
+ * This callback is called by pm_regexp_parse() when a named capture group is found.
+ */
+typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
+
+/**
+ * This callback is called by pm_regexp_parse() when a parse error is found.
+ */
+typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
+
+/**
+ * Parse a regular expression.
+ *
+ * @param parser The parser that is currently being used.
+ * @param source The source code to parse.
+ * @param size The size of the source code.
+ * @param extended_mode Whether to parse the regular expression in extended mode.
+ * @param name_callback The optional callback to call when a named capture group is found.
+ * @param name_data The optional data to pass to the name callback.
+ * @param error_callback The callback to call when a parse error is found.
+ * @param error_data The data to pass to the error callback.
+ */
+PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
+
+#endif
diff --git a/prism/srcs.mk b/prism/srcs.mk
new file mode 100644
index 0000000000..022662a00b
--- /dev/null
+++ b/prism/srcs.mk
@@ -0,0 +1,150 @@
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs uncommon.mk: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+	touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+		$(PRISM_SRCDIR)/templates/template.rb \
+		$(PRISM_SRCDIR)/srcs.mk.in
+	$(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/srcs.mk $(PRISM_SRCDIR)/srcs.mk.in
+
+distclean-prism-srcs::
+	$(RM) prism/.srcs.mk.time
+	$(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+	$(RM) $(PRISM_SRCDIR)/srcs.mk
+
+realclean-srcs-local:: realclean-prism-srcs
+
+main srcs: $(srcdir)/prism/api_node.c
+$(srcdir)/prism/api_node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/ext/prism/api_node.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) ext/prism/api_node.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/api_node.c
+
+main incs: $(srcdir)/prism/ast.h
+$(srcdir)/prism/ast.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/ast.h.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/ast.h $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/ast.h
+
+main incs: $(srcdir)/prism/diagnostic.h
+$(srcdir)/prism/diagnostic.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/diagnostic.h.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/diagnostic.h $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/diagnostic.h
+
+main srcs: $(srcdir)/lib/prism/compiler.rb
+$(srcdir)/lib/prism/compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/compiler.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/compiler.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/compiler.rb
+
+main srcs: $(srcdir)/lib/prism/dispatcher.rb
+$(srcdir)/lib/prism/dispatcher.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dispatcher.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dispatcher.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/dispatcher.rb
+
+main srcs: $(srcdir)/lib/prism/dot_visitor.rb
+$(srcdir)/lib/prism/dot_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dot_visitor.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dot_visitor.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/dot_visitor.rb
+
+main srcs: $(srcdir)/lib/prism/dsl.rb
+$(srcdir)/lib/prism/dsl.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dsl.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dsl.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/dsl.rb
+
+main srcs: $(srcdir)/lib/prism/inspect_visitor.rb
+$(srcdir)/lib/prism/inspect_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/inspect_visitor.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/inspect_visitor.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/inspect_visitor.rb
+
+main srcs: $(srcdir)/lib/prism/mutation_compiler.rb
+$(srcdir)/lib/prism/mutation_compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/mutation_compiler.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/mutation_compiler.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/mutation_compiler.rb
+
+main srcs: $(srcdir)/lib/prism/node.rb
+$(srcdir)/lib/prism/node.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/node.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/node.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/node.rb
+
+main srcs: $(srcdir)/lib/prism/reflection.rb
+$(srcdir)/lib/prism/reflection.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/reflection.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/reflection.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/reflection.rb
+
+main srcs: $(srcdir)/lib/prism/serialize.rb
+$(srcdir)/lib/prism/serialize.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/serialize.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/serialize.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/serialize.rb
+
+main srcs: $(srcdir)/lib/prism/visitor.rb
+$(srcdir)/lib/prism/visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/visitor.rb.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/visitor.rb $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/lib/prism/visitor.rb
+
+main srcs: $(srcdir)/prism/diagnostic.c
+$(srcdir)/prism/diagnostic.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/diagnostic.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/diagnostic.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/diagnostic.c
+
+main srcs: $(srcdir)/prism/node.c
+$(srcdir)/prism/node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/node.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/node.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/node.c
+
+main srcs: $(srcdir)/prism/prettyprint.c
+$(srcdir)/prism/prettyprint.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/prettyprint.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/prettyprint.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/prettyprint.c
+
+main srcs: $(srcdir)/prism/serialize.c
+$(srcdir)/prism/serialize.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/serialize.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/serialize.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/serialize.c
+
+main srcs: $(srcdir)/prism/token_type.c
+$(srcdir)/prism/token_type.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/token_type.c.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/token_type.c $@
+
+realclean-prism-srcs::
+	$(RM) $(srcdir)/prism/token_type.c
diff --git a/prism/srcs.mk.in b/prism/srcs.mk.in
new file mode 100644
index 0000000000..cc263fd1b4
--- /dev/null
+++ b/prism/srcs.mk.in
@@ -0,0 +1,48 @@
+<% # -*- ruby -*-
+require_relative 'templates/template'
+
+script = File.basename(__FILE__)
+srcs = output ? File.basename(output) : script.chomp('.in')
+mk = 'uncommon.mk'
+
+# %>
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs <%=%><%=mk%>: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+	touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+		$(PRISM_SRCDIR)/templates/template.rb \
+		$(PRISM_SRCDIR)/<%=%><%=script%>
+	$(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/<%=%><%=srcs%> $(PRISM_SRCDIR)/<%=%><%=script%>
+
+distclean-prism-srcs::
+	$(RM) prism/.srcs.mk.time
+	$(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+	$(RM) $(PRISM_SRCDIR)/<%=%><%=srcs%>
+
+realclean-srcs-local:: realclean-prism-srcs
+<% Prism::Template::TEMPLATES.map do |t|
+  /\.(?:[ch]|rb)\z/ =~ t or next
+  s = '$(srcdir)/' + t.sub(%r[\A(?:(src)|ext|include)/]) {$1 && 'prism/'}
+  s.sub!(%r[\A\$(srcdir)/prism/], '$(PRISM_SRCDIR)/')
+  target = s.end_with?('.h') ? 'incs' : 'srcs'
+# %>
+
+main <%=%><%=target%>: <%=%><%=s%>
+<%=%><%=s%>: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/<%=%><%=t%>.erb
+	$(Q) $(BASERUBY) $(PRISM_TEMPLATE) <%=%><%=t%> $@
+
+realclean-prism-srcs::
+	$(RM) <%=%><%=s%>
+<%
+end
+# %>
diff --git a/prism/static_literals.c b/prism/static_literals.c
new file mode 100644
index 0000000000..9fa37b999a
--- /dev/null
+++ b/prism/static_literals.c
@@ -0,0 +1,617 @@
+#include "prism/static_literals.h"
+
+/**
+ * A small struct used for passing around a subset of the information that is
+ * stored on the parser. We use this to avoid having static literals explicitly
+ * depend on the parser struct.
+ */
+typedef struct {
+    /** The list of newline offsets to use to calculate line numbers. */
+    const pm_newline_list_t *newline_list;
+
+    /** The line number that the parser starts on. */
+    int32_t start_line;
+
+    /** The name of the encoding that the parser is using. */
+    const char *encoding_name;
+} pm_static_literals_metadata_t;
+
+static inline uint32_t
+murmur_scramble(uint32_t value) {
+    value *= 0xcc9e2d51;
+    value = (value << 15) | (value >> 17);
+    value *= 0x1b873593;
+    return value;
+}
+
+/**
+ * Murmur hash (https://en.wikipedia.org/wiki/MurmurHash) is a non-cryptographic
+ * general-purpose hash function. It is fast, which is what we care about in
+ * this case.
+ */
+static uint32_t
+murmur_hash(const uint8_t *key, size_t length) {
+    uint32_t hash = 0x9747b28c;
+    uint32_t segment;
+
+    for (size_t index = length >> 2; index; index--) {
+        memcpy(&segment, key, sizeof(uint32_t));
+        key += sizeof(uint32_t);
+        hash ^= murmur_scramble(segment);
+        hash = (hash << 13) | (hash >> 19);
+        hash = hash * 5 + 0xe6546b64;
+    }
+
+    segment = 0;
+    for (size_t index = length & 3; index; index--) {
+        segment <<= 8;
+        segment |= key[index - 1];
+    }
+
+    hash ^= murmur_scramble(segment);
+    hash ^= (uint32_t) length;
+    hash ^= hash >> 16;
+    hash *= 0x85ebca6b;
+    hash ^= hash >> 13;
+    hash *= 0xc2b2ae35;
+    hash ^= hash >> 16;
+    return hash;
+}
+
+/**
+ * Hash the value of an integer and return it.
+ */
+static uint32_t
+integer_hash(const pm_integer_t *integer) {
+    uint32_t hash;
+    if (integer->values) {
+        hash = murmur_hash((const uint8_t *) integer->values, sizeof(uint32_t) * integer->length);
+    } else {
+        hash = murmur_hash((const uint8_t *) &integer->value, sizeof(uint32_t));
+    }
+
+    if (integer->negative) {
+        hash ^= murmur_scramble((uint32_t) 1);
+    }
+
+    return hash;
+}
+
+/**
+ * Return the hash of the given node. It is important that nodes that have
+ * equivalent static literal values have the same hash. This is because we use
+ * these hashes to look for duplicates.
+ */
+static uint32_t
+node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_INTEGER_NODE: {
+            // Integers hash their value.
+            const pm_integer_node_t *cast = (const pm_integer_node_t *) node;
+            return integer_hash(&cast->value);
+        }
+        case PM_SOURCE_LINE_NODE: {
+            // Source lines hash their line number.
+            const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
+            const int32_t *value = &line_column.line;
+            return murmur_hash((const uint8_t *) value, sizeof(int32_t));
+        }
+        case PM_FLOAT_NODE: {
+            // Floats hash their value.
+            const double *value = &((const pm_float_node_t *) node)->value;
+            return murmur_hash((const uint8_t *) value, sizeof(double));
+        }
+        case PM_RATIONAL_NODE: {
+            // Rationals hash their numerator and denominator.
+            const pm_rational_node_t *cast = (const pm_rational_node_t *) node;
+            return integer_hash(&cast->numerator) ^ integer_hash(&cast->denominator) ^ murmur_scramble((uint32_t) cast->base.type);
+        }
+        case PM_IMAGINARY_NODE: {
+            // Imaginaries hash their numeric value. Because their numeric value
+            // is stored as a subnode, we hash that node and then mix in the
+            // fact that this is an imaginary node.
+            const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
+            return node_hash(metadata, numeric) ^ murmur_scramble((uint32_t) node->type);
+        }
+        case PM_STRING_NODE: {
+            // Strings hash their value and mix in their flags so that different
+            // encodings are not considered equal.
+            const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped;
+
+            pm_node_flags_t flags = node->flags;
+            flags &= (PM_STRING_FLAGS_FORCED_BINARY_ENCODING | PM_STRING_FLAGS_FORCED_UTF8_ENCODING);
+
+            return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) flags);
+        }
+        case PM_SOURCE_FILE_NODE: {
+            // Source files hash their value and mix in their flags so that
+            // different encodings are not considered equal.
+            const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath;
+            return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t));
+        }
+        case PM_REGULAR_EXPRESSION_NODE: {
+            // Regular expressions hash their value and mix in their flags so
+            // that different encodings are not considered equal.
+            const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped;
+            return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
+        }
+        case PM_SYMBOL_NODE: {
+            // Symbols hash their value and mix in their flags so that different
+            // encodings are not considered equal.
+            const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped;
+            return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags);
+        }
+        default:
+            assert(false && "unreachable");
+            return 0;
+    }
+}
+
+/**
+ * Insert a node into the node hash. It accepts the hash that should hold the
+ * new node, the parser that generated the node, the node to insert, and a
+ * comparison function. The comparison function is used for collision detection,
+ * and must be able to compare all node types that will be stored in this hash.
+ */
+static pm_node_t *
+pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *metadata, pm_node_t *node, bool replace, int (*compare)(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right)) {
+    // If we are out of space, we need to resize the hash. This will cause all
+    // of the nodes to be rehashed and reinserted into the new hash.
+    if (hash->size * 2 >= hash->capacity) {
+        // First, allocate space for the new node list.
+        uint32_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2;
+        pm_node_t **new_nodes = xcalloc(new_capacity, sizeof(pm_node_t *));
+        if (new_nodes == NULL) return NULL;
+
+        // It turns out to be more efficient to mask the hash value than to use
+        // the modulo operator. Because our capacities are always powers of two,
+        // we can use a bitwise AND to get the same result as the modulo
+        // operator.
+        uint32_t mask = new_capacity - 1;
+
+        // Now, rehash all of the nodes into the new list.
+        for (uint32_t index = 0; index < hash->capacity; index++) {
+            pm_node_t *node = hash->nodes[index];
+
+            if (node != NULL) {
+                uint32_t index = node_hash(metadata, node) & mask;
+                new_nodes[index] = node;
+            }
+        }
+
+        // Finally, free the old node list and update the hash.
+        xfree(hash->nodes);
+        hash->nodes = new_nodes;
+        hash->capacity = new_capacity;
+    }
+
+    // Now, insert the node into the hash.
+    uint32_t mask = hash->capacity - 1;
+    uint32_t index = node_hash(metadata, node) & mask;
+
+    // We use linear probing to resolve collisions. This means that if the
+    // current index is occupied, we will move to the next index and try again.
+    // We are guaranteed that this will eventually find an empty slot because we
+    // resize the hash when it gets too full.
+    while (hash->nodes[index] != NULL) {
+        if (compare(metadata, hash->nodes[index], node) == 0) break;
+        index = (index + 1) & mask;
+    }
+
+    // If the current index is occupied, we need to return the node that was
+    // already in the hash. Otherwise, we can just increment the size and insert
+    // the new node.
+    pm_node_t *result = hash->nodes[index];
+
+    if (result == NULL) {
+        hash->size++;
+        hash->nodes[index] = node;
+    } else if (replace) {
+        hash->nodes[index] = node;
+    }
+
+    return result;
+}
+
+/**
+ * Free the internal memory associated with the given node hash.
+ */
+static void
+pm_node_hash_free(pm_node_hash_t *hash) {
+    if (hash->capacity > 0) xfree(hash->nodes);
+}
+
+/**
+ * Compare two values that can be compared with a simple numeric comparison.
+ */
+#define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
+
+/**
+ * Return the integer value of the given node as an int64_t.
+ */
+static int64_t
+pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_INTEGER_NODE: {
+            const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
+            if (integer->values) return integer->negative ? INT64_MIN : INT64_MAX;
+
+            int64_t value = (int64_t) integer->value;
+            return integer->negative ? -value : value;
+        }
+        case PM_SOURCE_LINE_NODE:
+            return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
+        default:
+            assert(false && "unreachable");
+            return 0;
+    }
+}
+
+/**
+ * A comparison function for comparing two IntegerNode or SourceLineNode
+ * instances.
+ */
+static int
+pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+    if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
+        int64_t left_value = pm_int64_value(metadata, left);
+        int64_t right_value = pm_int64_value(metadata, right);
+        return PM_NUMERIC_COMPARISON(left_value, right_value);
+    }
+
+    const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
+    const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
+    return pm_integer_compare(left_integer, right_integer);
+}
+
+/**
+ * A comparison function for comparing two FloatNode instances.
+ */
+static int
+pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+    const double left_value = ((const pm_float_node_t *) left)->value;
+    const double right_value = ((const pm_float_node_t *) right)->value;
+    return PM_NUMERIC_COMPARISON(left_value, right_value);
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached numbers.
+ */
+static int
+pm_compare_number_nodes(const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+    if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
+        return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
+    }
+
+    switch (PM_NODE_TYPE(left)) {
+        case PM_IMAGINARY_NODE:
+            return pm_compare_number_nodes(metadata, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
+        case PM_RATIONAL_NODE: {
+            const pm_rational_node_t *left_rational = (const pm_rational_node_t *) left;
+            const pm_rational_node_t *right_rational = (const pm_rational_node_t *) right;
+
+            int result = pm_integer_compare(&left_rational->denominator, &right_rational->denominator);
+            if (result != 0) return result;
+
+            return pm_integer_compare(&left_rational->numerator, &right_rational->numerator);
+        }
+        case PM_INTEGER_NODE:
+            return pm_compare_integer_nodes(metadata, left, right);
+        case PM_FLOAT_NODE:
+            return pm_compare_float_nodes(metadata, left, right);
+        default:
+            assert(false && "unreachable");
+            return 0;
+    }
+}
+
+/**
+ * Return a pointer to the string value of the given node.
+ */
+static const pm_string_t *
+pm_string_value(const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_STRING_NODE:
+            return &((const pm_string_node_t *) node)->unescaped;
+        case PM_SOURCE_FILE_NODE:
+            return &((const pm_source_file_node_t *) node)->filepath;
+        case PM_SYMBOL_NODE:
+            return &((const pm_symbol_node_t *) node)->unescaped;
+        default:
+            assert(false && "unreachable");
+            return NULL;
+    }
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached strings.
+ */
+static int
+pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+    const pm_string_t *left_string = pm_string_value(left);
+    const pm_string_t *right_string = pm_string_value(right);
+    return pm_string_compare(left_string, right_string);
+}
+
+/**
+ * A comparison function for comparing two RegularExpressionNode instances.
+ */
+static int
+pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+    const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
+    const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
+
+    int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
+    if (result != 0) return result;
+
+    return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
+}
+
+#undef PM_NUMERIC_COMPARISON
+
+/**
+ * Add a node to the set of static literals.
+ */
+pm_node_t *
+pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_INTEGER_NODE:
+        case PM_SOURCE_LINE_NODE:
+            return pm_node_hash_insert(
+                &literals->integer_nodes,
+                &(pm_static_literals_metadata_t) {
+                    .newline_list = newline_list,
+                    .start_line = start_line,
+                    .encoding_name = NULL
+                },
+                node,
+                replace,
+                pm_compare_integer_nodes
+            );
+        case PM_FLOAT_NODE:
+            return pm_node_hash_insert(
+                &literals->float_nodes,
+                &(pm_static_literals_metadata_t) {
+                    .newline_list = newline_list,
+                    .start_line = start_line,
+                    .encoding_name = NULL
+                },
+                node,
+                replace,
+                pm_compare_float_nodes
+            );
+        case PM_RATIONAL_NODE:
+        case PM_IMAGINARY_NODE:
+            return pm_node_hash_insert(
+                &literals->number_nodes,
+                &(pm_static_literals_metadata_t) {
+                    .newline_list = newline_list,
+                    .start_line = start_line,
+                    .encoding_name = NULL
+                },
+                node,
+                replace,
+                pm_compare_number_nodes
+            );
+        case PM_STRING_NODE:
+        case PM_SOURCE_FILE_NODE:
+            return pm_node_hash_insert(
+                &literals->string_nodes,
+                &(pm_static_literals_metadata_t) {
+                    .newline_list = newline_list,
+                    .start_line = start_line,
+                    .encoding_name = NULL
+                },
+                node,
+                replace,
+                pm_compare_string_nodes
+            );
+        case PM_REGULAR_EXPRESSION_NODE:
+            return pm_node_hash_insert(
+                &literals->regexp_nodes,
+                &(pm_static_literals_metadata_t) {
+                    .newline_list = newline_list,
+                    .start_line = start_line,
+                    .encoding_name = NULL
+                },
+                node,
+                replace,
+                pm_compare_regular_expression_nodes
+            );
+        case PM_SYMBOL_NODE:
+            return pm_node_hash_insert(
+                &literals->symbol_nodes,
+                &(pm_static_literals_metadata_t) {
+                    .newline_list = newline_list,
+                    .start_line = start_line,
+                    .encoding_name = NULL
+                },
+                node,
+                replace,
+                pm_compare_string_nodes
+            );
+        case PM_TRUE_NODE: {
+            pm_node_t *duplicated = literals->true_node;
+            if ((duplicated == NULL) || replace) literals->true_node = node;
+            return duplicated;
+        }
+        case PM_FALSE_NODE: {
+            pm_node_t *duplicated = literals->false_node;
+            if ((duplicated == NULL) || replace) literals->false_node = node;
+            return duplicated;
+        }
+        case PM_NIL_NODE: {
+            pm_node_t *duplicated = literals->nil_node;
+            if ((duplicated == NULL) || replace) literals->nil_node = node;
+            return duplicated;
+        }
+        case PM_SOURCE_ENCODING_NODE: {
+            pm_node_t *duplicated = literals->source_encoding_node;
+            if ((duplicated == NULL) || replace) literals->source_encoding_node = node;
+            return duplicated;
+        }
+        default:
+            return NULL;
+    }
+}
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ */
+void
+pm_static_literals_free(pm_static_literals_t *literals) {
+    pm_node_hash_free(&literals->integer_nodes);
+    pm_node_hash_free(&literals->float_nodes);
+    pm_node_hash_free(&literals->number_nodes);
+    pm_node_hash_free(&literals->string_nodes);
+    pm_node_hash_free(&literals->regexp_nodes);
+    pm_node_hash_free(&literals->symbol_nodes);
+}
+
+/**
+ * A helper to determine if the given node is a static literal that is positive.
+ * This is used for formatting imaginary nodes.
+ */
+static bool
+pm_static_literal_positive_p(const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_FLOAT_NODE:
+            return ((const pm_float_node_t *) node)->value > 0;
+        case PM_INTEGER_NODE:
+            return !((const pm_integer_node_t *) node)->value.negative;
+        case PM_RATIONAL_NODE:
+            return !((const pm_rational_node_t *) node)->numerator.negative;
+        case PM_IMAGINARY_NODE:
+            return pm_static_literal_positive_p(((const pm_imaginary_node_t *) node)->numeric);
+        default:
+            assert(false && "unreachable");
+            return false;
+    }
+}
+
+/**
+ * Create a string-based representation of the given static literal.
+ */
+static inline void
+pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_FALSE_NODE:
+            pm_buffer_append_string(buffer, "false", 5);
+            break;
+        case PM_FLOAT_NODE: {
+            const double value = ((const pm_float_node_t *) node)->value;
+
+            if (PRISM_ISINF(value)) {
+                if (*node->location.start == '-') {
+                    pm_buffer_append_byte(buffer, '-');
+                }
+                pm_buffer_append_string(buffer, "Infinity", 8);
+            } else if (value == 0.0) {
+                if (*node->location.start == '-') {
+                    pm_buffer_append_byte(buffer, '-');
+                }
+                pm_buffer_append_string(buffer, "0.0", 3);
+            } else {
+                pm_buffer_append_format(buffer, "%g", value);
+
+                // %g will not insert a .0 for 1e100 (we'll get back 1e+100). So
+                // we check for the decimal point and add it in here if it's not
+                // present.
+                if (pm_buffer_index(buffer, '.') == SIZE_MAX) {
+                    size_t exponent_index = pm_buffer_index(buffer, 'e');
+                    size_t index = exponent_index == SIZE_MAX ? pm_buffer_length(buffer) : exponent_index;
+                    pm_buffer_insert(buffer, index, ".0", 2);
+                }
+            }
+
+            break;
+        }
+        case PM_IMAGINARY_NODE: {
+            const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric;
+            pm_buffer_append_string(buffer, "(0", 2);
+            if (pm_static_literal_positive_p(numeric)) pm_buffer_append_byte(buffer, '+');
+            pm_static_literal_inspect_node(buffer, metadata, numeric);
+            if (PM_NODE_TYPE_P(numeric, PM_RATIONAL_NODE)) {
+                pm_buffer_append_byte(buffer, '*');
+            }
+            pm_buffer_append_string(buffer, "i)", 2);
+            break;
+        }
+        case PM_INTEGER_NODE:
+            pm_integer_string(buffer, &((const pm_integer_node_t *) node)->value);
+            break;
+        case PM_NIL_NODE:
+            pm_buffer_append_string(buffer, "nil", 3);
+            break;
+        case PM_RATIONAL_NODE: {
+            const pm_rational_node_t *rational = (const pm_rational_node_t *) node;
+            pm_buffer_append_byte(buffer, '(');
+            pm_integer_string(buffer, &rational->numerator);
+            pm_buffer_append_byte(buffer, '/');
+            pm_integer_string(buffer, &rational->denominator);
+            pm_buffer_append_byte(buffer, ')');
+            break;
+        }
+        case PM_REGULAR_EXPRESSION_NODE: {
+            const pm_string_t *unescaped = &((const pm_regular_expression_node_t *) node)->unescaped;
+            pm_buffer_append_byte(buffer, '/');
+            pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+            pm_buffer_append_byte(buffer, '/');
+
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE)) pm_buffer_append_string(buffer, "m", 1);
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE)) pm_buffer_append_string(buffer, "i", 1);
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)) pm_buffer_append_string(buffer, "x", 1);
+            if (PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT)) pm_buffer_append_string(buffer, "n", 1);
+
+            break;
+        }
+        case PM_SOURCE_ENCODING_NODE:
+            pm_buffer_append_format(buffer, "#<Encoding:%s>", metadata->encoding_name);
+            break;
+        case PM_SOURCE_FILE_NODE: {
+            const pm_string_t *filepath = &((const pm_source_file_node_t *) node)->filepath;
+            pm_buffer_append_byte(buffer, '"');
+            pm_buffer_append_source(buffer, pm_string_source(filepath), pm_string_length(filepath), PM_BUFFER_ESCAPING_RUBY);
+            pm_buffer_append_byte(buffer, '"');
+            break;
+        }
+        case PM_SOURCE_LINE_NODE:
+            pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
+            break;
+        case PM_STRING_NODE: {
+            const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
+            pm_buffer_append_byte(buffer, '"');
+            pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+            pm_buffer_append_byte(buffer, '"');
+            break;
+        }
+        case PM_SYMBOL_NODE: {
+            const pm_string_t *unescaped = &((const pm_symbol_node_t *) node)->unescaped;
+            pm_buffer_append_byte(buffer, ':');
+            pm_buffer_append_source(buffer, pm_string_source(unescaped), pm_string_length(unescaped), PM_BUFFER_ESCAPING_RUBY);
+            break;
+        }
+        case PM_TRUE_NODE:
+            pm_buffer_append_string(buffer, "true", 4);
+            break;
+        default:
+            assert(false && "unreachable");
+            break;
+    }
+}
+
+/**
+ * Create a string-based representation of the given static literal.
+ */
+void
+pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
+    pm_static_literal_inspect_node(
+        buffer,
+        &(pm_static_literals_metadata_t) {
+            .newline_list = newline_list,
+            .start_line = start_line,
+            .encoding_name = encoding_name
+        },
+        node
+    );
+}
diff --git a/prism/static_literals.h b/prism/static_literals.h
new file mode 100644
index 0000000000..bd29761899
--- /dev/null
+++ b/prism/static_literals.h
@@ -0,0 +1,121 @@
+/**
+ * @file static_literals.h
+ *
+ * A set of static literal nodes that can be checked for duplicates.
+ */
+#ifndef PRISM_STATIC_LITERALS_H
+#define PRISM_STATIC_LITERALS_H
+
+#include "prism/defines.h"
+#include "prism/ast.h"
+#include "prism/util/pm_newline_list.h"
+
+#include <assert.h>
+#include <stdbool.h>
+
+/**
+ * An internal hash table for a set of nodes.
+ */
+typedef struct {
+    /** The array of nodes in the hash table. */
+    pm_node_t **nodes;
+
+    /** The size of the hash table. */
+    uint32_t size;
+
+    /** The space that has been allocated in the hash table. */
+    uint32_t capacity;
+} pm_node_hash_t;
+
+/**
+ * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
+ * to alert the user of potential issues. To do this, we keep a set of the nodes
+ * that have been seen so far, and compare whenever we find a new node.
+ *
+ * We bucket the nodes based on their type to minimize the number of comparisons
+ * that need to be performed.
+ */
+typedef struct {
+    /**
+     * This is the set of IntegerNode and SourceLineNode instances.
+     */
+    pm_node_hash_t integer_nodes;
+
+    /**
+     * This is the set of FloatNode instances.
+     */
+    pm_node_hash_t float_nodes;
+
+    /**
+     * This is the set of RationalNode and ImaginaryNode instances.
+     */
+    pm_node_hash_t number_nodes;
+
+    /**
+     * This is the set of StringNode and SourceFileNode instances.
+     */
+    pm_node_hash_t string_nodes;
+
+    /**
+     * This is the set of RegularExpressionNode instances.
+     */
+    pm_node_hash_t regexp_nodes;
+
+    /**
+     * This is the set of SymbolNode instances.
+     */
+    pm_node_hash_t symbol_nodes;
+
+    /**
+     * A pointer to the last TrueNode instance that was inserted, or NULL.
+     */
+    pm_node_t *true_node;
+
+    /**
+     * A pointer to the last FalseNode instance that was inserted, or NULL.
+     */
+    pm_node_t *false_node;
+
+    /**
+     * A pointer to the last NilNode instance that was inserted, or NULL.
+     */
+    pm_node_t *nil_node;
+
+    /**
+     * A pointer to the last SourceEncodingNode instance that was inserted, or
+     * NULL.
+     */
+    pm_node_t *source_encoding_node;
+} pm_static_literals_t;
+
+/**
+ * Add a node to the set of static literals.
+ *
+ * @param newline_list The list of newline offsets to use to calculate lines.
+ * @param start_line The line number that the parser starts on.
+ * @param literals The set of static literals to add the node to.
+ * @param node The node to add to the set.
+ * @param replace Whether to replace the previous node if one already exists.
+ * @return A pointer to the node that is being overwritten, if there is one.
+ */
+pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ *
+ * @param literals The set of static literals to free.
+ */
+void pm_static_literals_free(pm_static_literals_t *literals);
+
+/**
+ * Create a string-based representation of the given static literal.
+ *
+ * @param buffer The buffer to write the string to.
+ * @param newline_list The list of newline offsets to use to calculate lines.
+ * @param start_line The line number that the parser starts on.
+ * @param encoding_name The name of the encoding of the source being parsed.
+ * @param node The node to create a string representation of.
+ */
+void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
+
+#endif
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
new file mode 100644
index 0000000000..23af8886a7
--- /dev/null
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -0,0 +1,282 @@
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+#include "prism/extension.h"
+
+extern VALUE rb_cPrism;
+extern VALUE rb_cPrismNode;
+extern VALUE rb_cPrismSource;
+extern VALUE rb_cPrismToken;
+extern VALUE rb_cPrismLocation;
+
+<%- nodes.each do |node| -%>
+static VALUE rb_cPrism<%= node.name %>;
+<%- end -%>
+
+static VALUE
+pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) {
+    if (freeze) {
+        VALUE location_argv[] = {
+            source,
+            LONG2FIX(start - parser->start),
+            LONG2FIX(end - start)
+        };
+
+        return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation));
+    } else {
+        uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
+        return ULL2NUM(value);
+    }
+}
+
+VALUE
+pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
+    ID type = rb_intern(pm_token_type_name(token->type));
+    VALUE location = pm_location_new(parser, token->start, token->end, source, freeze);
+
+    VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding);
+    if (freeze) rb_obj_freeze(slice);
+
+    VALUE argv[] = { source, ID2SYM(type), slice, location };
+    VALUE value = rb_class_new_instance(4, argv, rb_cPrismToken);
+    if (freeze) rb_obj_freeze(value);
+
+    return value;
+}
+
+static VALUE
+pm_string_new(const pm_string_t *string, rb_encoding *encoding) {
+    return rb_obj_freeze(rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding));
+}
+
+VALUE
+pm_integer_new(const pm_integer_t *integer) {
+    VALUE result;
+    if (integer->values == NULL) {
+        result = UINT2NUM(integer->value);
+    } else {
+        VALUE string = rb_str_new(NULL, integer->length * 8);
+        unsigned char *bytes = (unsigned char *) RSTRING_PTR(string);
+
+        size_t offset = integer->length * 8;
+        for (size_t value_index = 0; value_index < integer->length; value_index++) {
+            uint32_t value = integer->values[value_index];
+
+            for (int index = 0; index < 8; index++) {
+                int byte = (value >> (4 * index)) & 0xf;
+                bytes[--offset] = byte < 10 ? byte + '0' : byte - 10 + 'a';
+            }
+        }
+
+        result = rb_funcall(string, rb_intern("to_i"), 1, UINT2NUM(16));
+    }
+
+    if (integer->negative) {
+        result = rb_funcall(result, rb_intern("-@"), 0);
+    }
+
+    return result;
+}
+
+// Create a Prism::Source object from the given parser, after pm_parse() was called.
+VALUE
+pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
+    VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
+
+    VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
+    for (size_t index = 0; index < parser->newline_list.size; index++) {
+        rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index]));
+    }
+
+    if (freeze) {
+        rb_obj_freeze(source_string);
+        rb_obj_freeze(offsets);
+    }
+
+    VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets);
+    if (freeze) rb_obj_freeze(source);
+
+    return source;
+}
+
+typedef struct pm_node_stack_node {
+    struct pm_node_stack_node *prev;
+    const pm_node_t *visit;
+    bool visited;
+} pm_node_stack_node_t;
+
+static void
+pm_node_stack_push(pm_node_stack_node_t **stack, const pm_node_t *visit) {
+    pm_node_stack_node_t *node = xmalloc(sizeof(pm_node_stack_node_t));
+    node->prev = *stack;
+    node->visit = visit;
+    node->visited = false;
+    *stack = node;
+}
+
+static const pm_node_t *
+pm_node_stack_pop(pm_node_stack_node_t **stack) {
+    pm_node_stack_node_t *current = *stack;
+    const pm_node_t *visit = current->visit;
+
+    *stack = current->prev;
+    xfree(current);
+
+    return visit;
+}
+
+VALUE
+pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
+    VALUE constants = rb_ary_new_capa(parser->constant_pool.size);
+
+    for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
+        pm_constant_t *constant = &parser->constant_pool.constants[index];
+        int state = 0;
+
+        VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding);
+        VALUE value = rb_protect(rb_str_intern, string, &state);
+
+        if (state != 0) {
+            value = ID2SYM(rb_intern_const("?"));
+            rb_set_errinfo(Qnil);
+        }
+
+        rb_ary_push(constants, value);
+    }
+
+    pm_node_stack_node_t *node_stack = NULL;
+    pm_node_stack_push(&node_stack, node);
+    VALUE value_stack = rb_ary_new();
+
+    while (node_stack != NULL) {
+        if (!node_stack->visited) {
+            if (node_stack->visit == NULL) {
+                pm_node_stack_pop(&node_stack);
+                rb_ary_push(value_stack, Qnil);
+                continue;
+            }
+
+            const pm_node_t *node = node_stack->visit;
+            node_stack->visited = true;
+
+            switch (PM_NODE_TYPE(node)) {
+                <%- nodes.each do |node| -%>
+                <%- if node.fields.any? { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                case <%= node.type %>: {
+                    pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+                    <%- node.fields.each do |field| -%>
+                    <%- case field -%>
+                    <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
+                    pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>);
+                    <%- when Prism::Template::NodeListField -%>
+                    for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+                        pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
+                    }
+                    <%- end -%>
+                    <%- end -%>
+                    break;
+                }
+                <%- end -%>
+                <%- end -%>
+                default:
+                    break;
+            }
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+        } else {
+            const pm_node_t *node = pm_node_stack_pop(&node_stack);
+
+            switch (PM_NODE_TYPE(node)) {
+                <%- nodes.each do |node| -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                case <%= node.type %>: {
+                    <%- if node.fields.any? { |field| ![Prism::Template::NodeField, Prism::Template::OptionalNodeField].include?(field.class) } -%>
+                    pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+                    <%- end -%>
+                    VALUE argv[<%= node.fields.length + 4 %>];
+
+                    // source
+                    argv[0] = source;
+
+                    // node_id
+                    argv[1] = ULONG2NUM(node->node_id);
+
+                    // location
+                    argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze);
+
+                    // flags
+                    argv[3] = ULONG2NUM(node->flags);
+                    <%- node.fields.each.with_index(4) do |field, index| -%>
+
+                    // <%= field.name %>
+                    <%- case field -%>
+                    <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = rb_ary_pop(value_stack);
+                    <%- when Prism::Template::NodeListField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
+                    for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+                        rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
+                    }
+                    if (freeze) rb_obj_freeze(argv[<%= index %>]);
+                    <%- when Prism::Template::StringField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = pm_string_new(&cast-><%= field.name %>, encoding);
+                    <%- when Prism::Template::ConstantField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    assert(cast-><%= field.name %> != 0);
+                    argv[<%= index %>] = RARRAY_AREF(constants, cast-><%= field.name %> - 1);
+                    <%- when Prism::Template::OptionalConstantField -%>
+                    argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : RARRAY_AREF(constants, cast-><%= field.name %> - 1);
+                    <%- when Prism::Template::ConstantListField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
+                    for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+                        assert(cast-><%= field.name %>.ids[index] != 0);
+                        rb_ary_push(argv[<%= index %>], RARRAY_AREF(constants, cast-><%= field.name %>.ids[index] - 1));
+                    }
+                    if (freeze) rb_obj_freeze(argv[<%= index %>]);
+                    <%- when Prism::Template::LocationField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+                    <%- when Prism::Template::OptionalLocationField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+                    <%- when Prism::Template::UInt8Field -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
+                    <%- when Prism::Template::UInt32Field -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = ULONG2NUM(cast-><%= field.name %>);
+                    <%- when Prism::Template::IntegerField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = pm_integer_new(&cast-><%= field.name %>);
+                    <%- when Prism::Template::DoubleField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+                    argv[<%= index %>] = DBL2NUM(cast-><%= field.name %>);
+                    <%- else -%>
+                    <%- raise -%>
+                    <%- end -%>
+                    <%- end -%>
+
+                    VALUE value = rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>);
+                    if (freeze) rb_obj_freeze(value);
+
+                    rb_ary_push(value_stack, value);
+                    break;
+                }
+                <%- end -%>
+                default:
+                    rb_raise(rb_eRuntimeError, "unknown node type: %d", PM_NODE_TYPE(node));
+            }
+        }
+    }
+
+    return rb_ary_pop(value_stack);
+}
+
+void
+Init_prism_api_node(void) {
+    <%- nodes.each do |node| -%>
+    rb_cPrism<%= node.name %> = rb_define_class_under(rb_cPrism, "<%= node.name %>", rb_cPrismNode);
+    <%- end -%>
+}
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
new file mode 100644
index 0000000000..790cf9ebb8
--- /dev/null
+++ b/prism/templates/include/prism/ast.h.erb
@@ -0,0 +1,238 @@
+/**
+ * @file ast.h
+ *
+ * The abstract syntax tree.
+ *
+ * --
+ */
+#ifndef PRISM_AST_H
+#define PRISM_AST_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_constant_pool.h"
+#include "prism/util/pm_integer.h"
+#include "prism/util/pm_string.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * This enum represents every type of token in the Ruby source.
+ */
+typedef enum pm_token_type {
+<%- tokens.each do |token| -%>
+    /** <%= token.comment %> */
+    PM_TOKEN_<%= token.name %><%= " = #{token.value}" if token.value %>,
+
+<%- end -%>
+    /** The maximum token value. */
+    PM_TOKEN_MAXIMUM,
+} pm_token_type_t;
+
+/**
+ * This struct represents a token in the Ruby source. We use it to track both
+ * type and location information.
+ */
+typedef struct {
+    /** The type of the token. */
+    pm_token_type_t type;
+
+    /** A pointer to the start location of the token in the source. */
+    const uint8_t *start;
+
+    /** A pointer to the end location of the token in the source. */
+    const uint8_t *end;
+} pm_token_t;
+
+/**
+ * This represents a range of bytes in the source string to which a node or
+ * token corresponds.
+ */
+typedef struct {
+    /** A pointer to the start location of the range in the source. */
+    const uint8_t *start;
+
+    /** A pointer to the end location of the range in the source. */
+    const uint8_t *end;
+} pm_location_t;
+
+struct pm_node;
+
+/**
+ * A list of nodes in the source, most often used for lists of children.
+ */
+typedef struct pm_node_list {
+    /** The number of nodes in the list. */
+    size_t size;
+
+    /** The capacity of the list that has been allocated. */
+    size_t capacity;
+
+    /** The nodes in the list. */
+    struct pm_node **nodes;
+} pm_node_list_t;
+
+/**
+ * This enum represents every type of node in the Ruby syntax tree.
+ */
+enum pm_node_type {
+<%- nodes.each_with_index do |node, index| -%>
+    /** <%= node.name %> */
+    <%= node.type %> = <%= index + 1 %>,
+
+<%- end -%>
+    /** A special kind of node used for compilation. */
+    PM_SCOPE_NODE
+};
+
+/**
+ * This is the type of node embedded in the node struct. We explicitly control
+ * the size of it here to avoid having the variable-width enum.
+ */
+typedef uint16_t pm_node_type_t;
+
+/**
+ * These are the flags embedded in the node struct. We explicitly control the
+ * size of it here to avoid having the variable-width enum.
+ */
+typedef uint16_t pm_node_flags_t;
+
+/**
+ * We store the flags enum in every node in the tree. Some flags are common to
+ * all nodes (the ones listed below). Others are specific to certain node types.
+ */
+static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
+static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
+
+/**
+ * This is the base structure that represents a node in the syntax tree. It is
+ * embedded into every node type.
+ */
+typedef struct pm_node {
+    /**
+     * This represents the type of the node. It somewhat maps to the nodes that
+     * existed in the original grammar and ripper, but it's not a 1:1 mapping.
+     */
+    pm_node_type_t type;
+
+    /**
+     * This represents any flags on the node. Some are common to all nodes, and
+     * some are specific to the type of node.
+     */
+    pm_node_flags_t flags;
+
+    /**
+     * The unique identifier for this node, which is deterministic based on the
+     * source. It is used to identify unique nodes across parses.
+     */
+    uint32_t node_id;
+
+    /**
+     * This is the location of the node in the source. It's a range of bytes
+     * containing a start and an end.
+     */
+    pm_location_t location;
+} pm_node_t;
+
+/**
+ * Cast the given node to the base pm_node_t type.
+ */
+#define PM_NODE_UPCAST(node_) ((pm_node_t *) (node_))
+
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node_) ((enum pm_node_type) (node_)->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
+#define PM_NODE_TYPE_P(node_, type_) (PM_NODE_TYPE(node_) == (type_))
+
+/**
+ * Return the flags associated with the given node.
+ */
+#define PM_NODE_FLAGS(node_) (PM_NODE_UPCAST(node_)->flags)
+
+/**
+ * Return true if the given flag is set on the given node.
+ */
+#define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0)
+<%- nodes.each do |node| -%>
+
+/**
+ * <%= node.name %>
+ *
+<%- node.each_comment_line do |line| -%>
+ *<%= line %>
+<%- end -%>
+ *
+ * Type: ::<%= node.type %>
+<% if (node_flags = node.flags) %>
+ * Flags (#pm_<%= node_flags.human %>):
+<%- node_flags.values.each do |value| -%>
+ * * ::PM_<%= node_flags.human.upcase %>_<%= value.name %>
+<%- end -%>
+<%- end -%>
+ *
+ * @extends pm_node_t
+ */
+typedef struct pm_<%= node.human %> {
+    /** The embedded base node. */
+    pm_node_t base;
+
+<%- node.fields.each do |field| -%>
+
+    /**
+     * <%= node.name %>#<%= field.name %>
+    <%- if field.comment -%>
+     *
+    <%- field.each_comment_line do |line| -%>
+     *<%= line %>
+    <%- end -%>
+    <%- end -%>
+     */
+    <%= case field
+    when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
+    when Prism::Template::NodeListField then "struct pm_node_list #{field.name}"
+    when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}"
+    when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}"
+    when Prism::Template::StringField then "pm_string_t #{field.name}"
+    when Prism::Template::LocationField, Prism::Template::OptionalLocationField then "pm_location_t #{field.name}"
+    when Prism::Template::UInt8Field then "uint8_t #{field.name}"
+    when Prism::Template::UInt32Field then "uint32_t #{field.name}"
+    when Prism::Template::IntegerField then "pm_integer_t #{field.name}"
+    when Prism::Template::DoubleField then "double #{field.name}"
+    else raise field.class.name
+    end
+    %>;
+<%- end -%>
+} pm_<%= node.human %>_t;
+<%- end -%>
+<%- flags.each do |flag| -%>
+
+/**
+ * <%= flag.comment %>
+ */
+typedef enum pm_<%= flag.human %> {
+    <%- flag.values.each_with_index do |value, index| -%>
+<%= "\n" if index > 0 -%>
+    /** <%= value.comment %> */
+    PM_<%= flag.human.upcase %>_<%= value.name %> = <%= 1 << (index + Prism::Template::COMMON_FLAGS_COUNT) %>,
+    <%- end -%>
+
+    PM_<%= flag.human.upcase %>_LAST,
+} pm_<%= flag.human %>_t;
+<%- end -%>
+
+/**
+ * When we're serializing to Java, we want to skip serializing the location
+ * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us
+ * to specify that through the environment. It will never be true except for in
+ * those build systems.
+ */
+#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0 %>
+
+#endif
diff --git a/prism/templates/include/prism/diagnostic.h.erb b/prism/templates/include/prism/diagnostic.h.erb
new file mode 100644
index 0000000000..07bbc8fae7
--- /dev/null
+++ b/prism/templates/include/prism/diagnostic.h.erb
@@ -0,0 +1,130 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
+#ifndef PRISM_DIAGNOSTIC_H
+#define PRISM_DIAGNOSTIC_H
+
+#include "prism/ast.h"
+#include "prism/defines.h"
+#include "prism/util/pm_list.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/**
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
+typedef enum {
+    // These are the error diagnostics.
+    <%- errors.each do |error| -%>
+    PM_ERR_<%= error.name %>,
+    <%- end -%>
+
+    // These are the warning diagnostics.
+    <%- warnings.each do |warning| -%>
+    PM_WARN_<%= warning.name %>,
+    <%- end -%>
+} pm_diagnostic_id_t;
+
+/**
+ * This struct represents a diagnostic generated during parsing.
+ *
+ * @extends pm_list_node_t
+ */
+typedef struct {
+    /** The embedded base node. */
+    pm_list_node_t node;
+
+    /** The location of the diagnostic in the source. */
+    pm_location_t location;
+
+    /** The ID of the diagnostic. */
+    pm_diagnostic_id_t diag_id;
+
+    /** The message associated with the diagnostic. */
+    const char *message;
+
+    /**
+     * Whether or not the memory related to the message of this diagnostic is
+     * owned by this diagnostic. If it is, it needs to be freed when the
+     * diagnostic is freed.
+     */
+    bool owned;
+
+    /**
+     * The level of the diagnostic, see `pm_error_level_t` and
+     * `pm_warning_level_t` for possible values.
+     */
+    uint8_t level;
+} pm_diagnostic_t;
+
+/**
+ * The levels of errors generated during parsing.
+ */
+typedef enum {
+    /** For errors that should raise a syntax error. */
+    PM_ERROR_LEVEL_SYNTAX = 0,
+
+    /** For errors that should raise an argument error. */
+    PM_ERROR_LEVEL_ARGUMENT = 1,
+
+    /** For errors that should raise a load error. */
+    PM_ERROR_LEVEL_LOAD = 2
+} pm_error_level_t;
+
+/**
+ * The levels of warnings generated during parsing.
+ */
+typedef enum {
+    /** For warnings which should be emitted if $VERBOSE != nil. */
+    PM_WARNING_LEVEL_DEFAULT = 0,
+
+    /** For warnings which should be emitted if $VERBOSE == true. */
+    PM_WARNING_LEVEL_VERBOSE = 1
+} pm_warning_level_t;
+
+/**
+ * Get the human-readable name of the given diagnostic ID.
+ *
+ * @param diag_id The diagnostic ID.
+ * @return The human-readable name of the diagnostic ID.
+ */
+const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using shared
+ * memory for its message.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @return Whether the diagnostic was successfully appended.
+ */
+bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @param ... The arguments to the format string for the message.
+ * @return Whether the diagnostic was successfully appended.
+ */
+bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
+
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ *
+ * @param list The list to deallocate.
+ */
+void pm_diagnostic_list_free(pm_list_t *list);
+
+#endif
diff --git a/prism/templates/lib/prism/compiler.rb.erb b/prism/templates/lib/prism/compiler.rb.erb
new file mode 100644
index 0000000000..66dbe666b9
--- /dev/null
+++ b/prism/templates/lib/prism/compiler.rb.erb
@@ -0,0 +1,43 @@
+module Prism
+  # A compiler is a visitor that returns the value of each node as it visits.
+  # This is as opposed to a visitor which will only walk the tree. This can be
+  # useful when you are trying to compile a tree into a different format.
+  #
+  # For example, to build a representation of the tree as s-expressions, you
+  # could write:
+  #
+  #     class SExpressions < Prism::Compiler
+  #       def visit_arguments_node(node) = [:arguments, super]
+  #       def visit_call_node(node) = [:call, super]
+  #       def visit_integer_node(node) = [:integer]
+  #       def visit_program_node(node) = [:program, super]
+  #     end
+  #
+  #     Prism.parse("1 + 2").value.accept(SExpressions.new)
+  #     # => [:program, [[[:call, [[:integer], [:arguments, [[:integer]]]]]]]]
+  #
+  class Compiler < Visitor
+    # Visit an individual node.
+    def visit(node)
+      node&.accept(self)
+    end
+
+    # Visit a list of nodes.
+    def visit_all(nodes)
+      nodes.map { |node| node&.accept(self) }
+    end
+
+    # Visit the child nodes of the given node.
+    def visit_child_nodes(node)
+      node.each_child_node.map { |node| node.accept(self) }
+    end
+
+    <%- nodes.each_with_index do |node, index| -%>
+<%= "\n" if index != 0 -%>
+    # Compile a <%= node.name %> node
+    def visit_<%= node.human %>(node)
+      node.each_child_node.map { |node| node.accept(self) }
+    end
+    <%- end -%>
+  end
+end
diff --git a/prism/templates/lib/prism/dispatcher.rb.erb b/prism/templates/lib/prism/dispatcher.rb.erb
new file mode 100644
index 0000000000..52478451c9
--- /dev/null
+++ b/prism/templates/lib/prism/dispatcher.rb.erb
@@ -0,0 +1,103 @@
+module Prism
+  # The dispatcher class fires events for nodes that are found while walking an
+  # AST to all registered listeners. It's useful for performing different types
+  # of analysis on the AST while only having to walk the tree once.
+  #
+  # To use the dispatcher, you would first instantiate it and register listeners
+  # for the events you're interested in:
+  #
+  #     class OctalListener
+  #       def on_integer_node_enter(node)
+  #         if node.octal? && !node.slice.start_with?("0o")
+  #           warn("Octal integers should be written with the 0o prefix")
+  #         end
+  #       end
+  #     end
+  #
+  #     listener = OctalListener.new
+  #     dispatcher = Prism::Dispatcher.new
+  #     dispatcher.register(listener, :on_integer_node_enter)
+  #
+  # Then, you can walk any number of trees and dispatch events to the listeners:
+  #
+  #     result = Prism.parse("001 + 002 + 003")
+  #     dispatcher.dispatch(result.value)
+  #
+  # Optionally, you can also use `#dispatch_once` to dispatch enter and leave
+  # events for a single node without recursing further down the tree. This can
+  # be useful in circumstances where you want to reuse the listeners you already
+  # have registers but want to stop walking the tree at a certain point.
+  #
+  #     integer = result.value.statements.body.first.receiver.receiver
+  #     dispatcher.dispatch_once(integer)
+  #
+  class Dispatcher < Visitor
+    # attr_reader listeners: Hash[Symbol, Array[Listener]]
+    attr_reader :listeners
+
+    # Initialize a new dispatcher.
+    def initialize
+      @listeners = {}
+    end
+
+    # Register a listener for one or more events.
+    #
+    # def register: (Listener, *Symbol) -> void
+    def register(listener, *events)
+      register_events(listener, events)
+    end
+
+    # Register all public methods of a listener that match the pattern
+    # `on_<node_name>_(enter|leave)`.
+    #
+    # def register_public_methods: (Listener) -> void
+    def register_public_methods(listener)
+      register_events(listener, listener.public_methods(false).grep(/\Aon_.+_(?:enter|leave)\z/))
+    end
+
+    # Register a listener for the given events.
+    private def register_events(listener, events)
+      events.each { |event| (listeners[event] ||= []) << listener }
+    end
+
+    # Walks `root` dispatching events to all registered listeners.
+    #
+    # def dispatch: (Node) -> void
+    alias dispatch visit
+
+    # Dispatches a single event for `node` to all registered listeners.
+    #
+    # def dispatch_once: (Node) -> void
+    def dispatch_once(node)
+      node.accept(DispatchOnce.new(listeners))
+    end
+    <%- nodes.each do |node| -%>
+
+    # Dispatch enter and leave events for <%= node.name %> nodes and continue
+    # walking the tree.
+    def visit_<%= node.human %>(node)
+      listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
+      super
+      listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
+    end
+    <%- end -%>
+
+    class DispatchOnce < Visitor # :nodoc:
+      attr_reader :listeners
+
+      def initialize(listeners)
+        @listeners = listeners
+      end
+      <%- nodes.each do |node| -%>
+
+      # Dispatch enter and leave events for <%= node.name %> nodes.
+      def visit_<%= node.human %>(node)
+        listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
+        listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
+      end
+      <%- end -%>
+    end
+
+    private_constant :DispatchOnce
+  end
+end
diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb
new file mode 100644
index 0000000000..cd2998fe61
--- /dev/null
+++ b/prism/templates/lib/prism/dot_visitor.rb.erb
@@ -0,0 +1,189 @@
+require "cgi/escape"
+require "cgi/util" unless defined?(CGI::EscapeExt)
+
+module Prism
+  # This visitor provides the ability to call Node#to_dot, which converts a
+  # subtree into a graphviz dot graph.
+  class DotVisitor < Visitor
+    class Field # :nodoc:
+      attr_reader :name, :value, :port
+
+      def initialize(name, value, port)
+        @name = name
+        @value = value
+        @port = port
+      end
+
+      def to_dot
+        if port
+          "<tr><td align=\"left\" colspan=\"2\" port=\"#{name}\">#{name}</td></tr>"
+        else
+          "<tr><td align=\"left\">#{name}</td><td>#{CGI.escapeHTML(value || raise)}</td></tr>"
+        end
+      end
+    end
+
+    class Table # :nodoc:
+      attr_reader :name, :fields
+
+      def initialize(name)
+        @name = name
+        @fields = []
+      end
+
+      def field(name, value = nil, port: false)
+        fields << Field.new(name, value, port)
+      end
+
+      def to_dot
+        dot = <<~DOT
+          <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
+            <tr><td colspan="2"><b>#{name}</b></td></tr>
+        DOT
+
+        if fields.any?
+          "#{dot}  #{fields.map(&:to_dot).join("\n  ")}\n</table>"
+        else
+          "#{dot}</table>"
+        end
+      end
+    end
+
+    class Digraph # :nodoc:
+      attr_reader :nodes, :waypoints, :edges
+
+      def initialize
+        @nodes = []
+        @waypoints = []
+        @edges = []
+      end
+
+      def node(value)
+        nodes << value
+      end
+
+      def waypoint(value)
+        waypoints << value
+      end
+
+      def edge(value)
+        edges << value
+      end
+
+      def to_dot
+        <<~DOT
+          digraph "Prism" {
+            node [
+              fontname=\"Courier New\"
+              shape=plain
+              style=filled
+              fillcolor=gray95
+            ];
+
+            #{nodes.map { |node| node.gsub(/\n/, "\n  ") }.join("\n  ")}
+            node [shape=point];
+            #{waypoints.join("\n  ")}
+
+            #{edges.join("\n  ")}
+          }
+        DOT
+      end
+    end
+
+    private_constant :Field, :Table, :Digraph
+
+    # The digraph that is being built.
+    attr_reader :digraph
+
+    # Initialize a new dot visitor.
+    def initialize
+      @digraph = Digraph.new
+    end
+
+    # Convert this visitor into a graphviz dot graph string.
+    def to_dot
+      digraph.to_dot
+    end
+    <%- nodes.each do |node| -%>
+
+    # Visit a <%= node.name %> node.
+    def visit_<%= node.human %>(node)
+      table = Table.new("<%= node.name %>")
+      id = node_id(node)
+      <%- if (node_flags = node.flags) -%>
+
+      # flags
+      table.field("flags", <%= node_flags.human %>_inspect(node))
+      <%- end -%>
+      <%- node.fields.each do |field| -%>
+
+      # <%= field.name %>
+      <%- case field -%>
+      <%- when Prism::Template::NodeField -%>
+      table.field("<%= field.name %>", port: true)
+      digraph.edge("#{id}:<%= field.name %> -> #{node_id(node.<%= field.name %>)};")
+      <%- when Prism::Template::OptionalNodeField -%>
+      unless (<%= field.name %> = node.<%= field.name %>).nil?
+        table.field("<%= field.name %>", port: true)
+        digraph.edge("#{id}:<%= field.name %> -> #{node_id(<%= field.name %>)};")
+      end
+      <%- when Prism::Template::NodeListField -%>
+      if node.<%= field.name %>.any?
+        table.field("<%= field.name %>", port: true)
+
+        waypoint = "#{id}_<%= field.name %>"
+        digraph.waypoint("#{waypoint};")
+
+        digraph.edge("#{id}:<%= field.name %> -> #{waypoint};")
+        node.<%= field.name %>.each { |child| digraph.edge("#{waypoint} -> #{node_id(child)};") }
+      else
+        table.field("<%= field.name %>", "[]")
+      end
+      <%- when Prism::Template::StringField, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantListField, Prism::Template::IntegerField, Prism::Template::DoubleField -%>
+      table.field("<%= field.name %>", node.<%= field.name %>.inspect)
+      <%- when Prism::Template::LocationField -%>
+      table.field("<%= field.name %>", location_inspect(node.<%= field.name %>))
+      <%- when Prism::Template::OptionalLocationField -%>
+      unless (<%= field.name %> = node.<%= field.name %>).nil?
+        table.field("<%= field.name %>", location_inspect(<%= field.name %>))
+      end
+      <%- else -%>
+      <%- raise -%>
+      <%- end -%>
+      <%- end -%>
+
+      digraph.nodes << <<~DOT
+        #{id} [
+          label=<#{table.to_dot.gsub(/\n/, "\n  ")}>
+        ];
+      DOT
+
+      super
+    end
+    <%- end -%>
+
+    private
+
+    # Generate a unique node ID for a node throughout the digraph.
+    def node_id(node)
+      "Node_#{node.object_id}"
+    end
+
+    # Inspect a location to display the start and end line and column numbers.
+    def location_inspect(location)
+      "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})"
+    end
+    <%- flags.each do |flag| -%>
+
+    # Inspect a node that has <%= flag.human %> flags to display the flags as a
+    # comma-separated list.
+    def <%= flag.human %>_inspect(node)
+      flags = [] #: Array[String]
+      <%- flag.values.each do |value| -%>
+      flags << "<%= value.name.downcase %>" if node.<%= value.name.downcase %>?
+      <%- end -%>
+      flags.join(", ")
+    end
+    <%- end -%>
+  end
+end
diff --git a/prism/templates/lib/prism/dsl.rb.erb b/prism/templates/lib/prism/dsl.rb.erb
new file mode 100644
index 0000000000..e16ebb7110
--- /dev/null
+++ b/prism/templates/lib/prism/dsl.rb.erb
@@ -0,0 +1,133 @@
+module Prism
+  # The DSL module provides a set of methods that can be used to create prism
+  # nodes in a more concise manner. For example, instead of writing:
+  #
+  #     source = Prism::Source.for("[1]")
+  #
+  #     Prism::ArrayNode.new(
+  #       source,
+  #       0,
+  #       Prism::Location.new(source, 0, 3),
+  #       0,
+  #       [
+  #         Prism::IntegerNode.new(
+  #           source,
+  #           0,
+  #           Prism::Location.new(source, 1, 1),
+  #           Prism::IntegerBaseFlags::DECIMAL,
+  #           1
+  #         )
+  #       ],
+  #       Prism::Location.new(source, 0, 1),
+  #       Prism::Location.new(source, 2, 1)
+  #     )
+  #
+  # you could instead write:
+  #
+  #     class Builder
+  #       include Prism::DSL
+  #
+  #       attr_reader :default_source
+  #
+  #       def initialize
+  #         @default_source = source("[1]")
+  #       end
+  #
+  #       def build
+  #         array_node(
+  #           location: location(start_offset: 0, length: 3),
+  #           elements: [
+  #             integer_node(
+  #               location: location(start_offset: 1, length: 1),
+  #               flags: integer_base_flag(:decimal),
+  #               value: 1
+  #             )
+  #           ],
+  #           opening_loc: location(start_offset: 0, length: 1),
+  #           closing_loc: location(start_offset: 2, length: 1)
+  #         )
+  #       end
+  #     end
+  #
+  # This is mostly helpful in the context of generating trees programmatically.
+  module DSL
+    # Provide all of these methods as module methods as well, to allow for
+    # building nodes like Prism::DSL.nil_node.
+    extend self
+
+    # Create a new Source object.
+    def source(string)
+      Source.for(string)
+    end
+
+    # Create a new Location object.
+    def location(source: default_source, start_offset: 0, length: 0)
+      Location.new(source, start_offset, length)
+    end
+    <%- nodes.each do |node| -%>
+
+    # Create a new <%= node.name %> node.
+    def <%= node.human %>(<%= ["source: default_source", "node_id: 0", "location: default_location", "flags: 0", *node.fields.map { |field|
+      case field
+      when Prism::Template::NodeField
+        kind = field.specific_kind || field.union_kind&.first
+        if kind.nil?
+          "#{field.name}: default_node(source, location)"
+        else
+          "#{field.name}: #{kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}(source: source)"
+        end
+      when Prism::Template::ConstantField
+        "#{field.name}: :\"\""
+      when Prism::Template::OptionalNodeField, Prism::Template::OptionalConstantField, Prism::Template::OptionalLocationField
+        "#{field.name}: nil"
+      when Prism::Template::NodeListField, Prism::Template::ConstantListField
+        "#{field.name}: []"
+      when Prism::Template::StringField
+        "#{field.name}: \"\""
+      when Prism::Template::LocationField
+        "#{field.name}: location"
+      when Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField
+        "#{field.name}: 0"
+      when Prism::Template::DoubleField
+        "#{field.name}: 0.0"
+      else
+        raise
+      end
+    }].join(", ") %>)
+      <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+    end
+    <%- end -%>
+    <%- flags.each do |flag| -%>
+
+    # Retrieve the value of one of the <%= flag.name %> flags.
+    def <%= flag.human.chomp("s") %>(name)
+      case name
+      <%- flag.values.each do |value| -%>
+      when :<%= value.name.downcase %> then <%= flag.name %>::<%= value.name %>
+      <%- end -%>
+      else Kernel.raise ArgumentError, "invalid <%= flag.name %> flag: #{name.inspect}"
+      end
+    end
+    <%- end -%>
+
+    private
+
+    # The default source object that gets attached to nodes and locations if no
+    # source is specified.
+    def default_source
+      Source.for("")
+    end
+
+    # The default location object that gets attached to nodes if no location is
+    # specified, which uses the given source.
+    def default_location
+      Location.new(default_source, 0, 0)
+    end
+
+    # The default node that gets attached to nodes if no node is specified for a
+    # required node field.
+    def default_node(source, location)
+      MissingNode.new(source, -1, location, 0)
+    end
+  end
+end
diff --git a/prism/templates/lib/prism/inspect_visitor.rb.erb b/prism/templates/lib/prism/inspect_visitor.rb.erb
new file mode 100644
index 0000000000..3cfe615d85
--- /dev/null
+++ b/prism/templates/lib/prism/inspect_visitor.rb.erb
@@ -0,0 +1,131 @@
+module Prism
+  # This visitor is responsible for composing the strings that get returned by
+  # the various #inspect methods defined on each of the nodes.
+  class InspectVisitor < Visitor
+    # Most of the time, we can simply pass down the indent to the next node.
+    # However, when we are inside a list we want some extra special formatting
+    # when we hit an element in that list. In this case, we have a special
+    # command that replaces the subsequent indent with the given value.
+    class Replace # :nodoc:
+      attr_reader :value
+
+      def initialize(value)
+        @value = value
+      end
+    end
+
+    private_constant :Replace
+
+    # The current prefix string.
+    attr_reader :indent
+
+    # The list of commands that we need to execute in order to compose the
+    # final string.
+    attr_reader :commands
+
+    # Initializes a new instance of the InspectVisitor.
+    def initialize(indent = +"")
+      @indent = indent
+      @commands = []
+    end
+
+    # Compose an inspect string for the given node.
+    def self.compose(node)
+      visitor = new
+      node.accept(visitor)
+      visitor.compose
+    end
+
+    # Compose the final string.
+    def compose
+      buffer = +""
+      replace = nil
+
+      until commands.empty?
+        # @type var command: String | node | Replace
+        # @type var indent: String
+        command, indent = *commands.shift
+
+        case command
+        when String
+          buffer << (replace || indent)
+          buffer << command
+          replace = nil
+        when Node
+          visitor = InspectVisitor.new(indent)
+          command.accept(visitor)
+          @commands = [*visitor.commands, *@commands]
+        when Replace
+          replace = command.value
+        else
+          raise "Unknown command: #{command.inspect}"
+        end
+      end
+
+      buffer
+    end
+    <%- nodes.each do |node| -%>
+
+    # Inspect a <%= node.name %> node.
+    def visit_<%= node.human %>(node)
+      commands << [inspect_node(<%= node.name.inspect %>, node), indent]
+      <%- (fields = [node.flags || Prism::Template::Flags.empty, *node.fields]).each_with_index do |field, index| -%>
+      <%- pointer = index == fields.length - 1 ? "└── " : "├── " -%>
+      <%- preadd = index == fields.length - 1 ? "    " : "│   " -%>
+      <%- case field -%>
+      <%- when Prism::Template::Flags -%>
+      flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), <%= field.values.map { |value| "(\"#{value.name.downcase}\" if node.#{value.name.downcase}?)" }.join(", ") %>].compact
+      commands << ["<%= pointer %>flags: #{flags.empty? ? "∅" : flags.join(", ")}\n", indent]
+      <%- when Prism::Template::NodeListField -%>
+      commands << ["<%= pointer %><%= field.name %>: (length: #{(<%= field.name %> = node.<%= field.name %>).length})\n", indent]
+      if <%= field.name %>.any?
+        <%= field.name %>[0...-1].each do |child|
+          commands << [Replace.new("#{indent}<%= preadd %>├── "), indent]
+          commands << [child, "#{indent}<%= preadd %>│   "]
+        end
+        commands << [Replace.new("#{indent}<%= preadd %>└── "), indent]
+        commands << [<%= field.name %>[-1], "#{indent}<%= preadd %>    "]
+      end
+      <%- when Prism::Template::NodeField -%>
+      commands << ["<%= pointer %><%= field.name %>:\n", indent]
+      commands << [node.<%= field.name %>, "#{indent}<%= preadd %>"]
+      <%- when Prism::Template::OptionalNodeField -%>
+      if (<%= field.name %> = node.<%= field.name %>).nil?
+        commands << ["<%= pointer %><%= field.name %>: ∅\n", indent]
+      else
+        commands << ["<%= pointer %><%= field.name %>:\n", indent]
+        commands << [<%= field.name %>, "#{indent}<%= preadd %>"]
+      end
+      <%- when Prism::Template::ConstantField, Prism::Template::ConstantListField, Prism::Template::StringField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField, Prism::Template::DoubleField -%>
+      commands << ["<%= pointer %><%= field.name %>: #{node.<%= field.name %>.inspect}\n", indent]
+      <%- when Prism::Template::OptionalConstantField -%>
+      if (<%= field.name %> = node.<%= field.name %>).nil?
+        commands << ["<%= pointer %><%= field.name %>: ∅\n", indent]
+      else
+        commands << ["<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n", indent]
+      end
+      <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField -%>
+      commands << ["<%= pointer %><%= field.name %>: #{inspect_location(node.<%= field.name %>)}\n", indent]
+      <%- end -%>
+      <%- end -%>
+    end
+    <%- end -%>
+
+    private
+
+    # Compose a header for the given node.
+    def inspect_node(name, node)
+      location = node.location
+      "@ #{name} (location: (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}))\n"
+    end
+
+    # Compose a string representing the given inner location field.
+    def inspect_location(location)
+      if location
+        "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}) = #{location.slice.inspect}"
+      else
+        "∅"
+      end
+    end
+  end
+end
diff --git a/prism/templates/lib/prism/mutation_compiler.rb.erb b/prism/templates/lib/prism/mutation_compiler.rb.erb
new file mode 100644
index 0000000000..565ee4e315
--- /dev/null
+++ b/prism/templates/lib/prism/mutation_compiler.rb.erb
@@ -0,0 +1,19 @@
+module Prism
+  # This visitor walks through the tree and copies each node as it is being
+  # visited. This is useful for consumers that want to mutate the tree, as you
+  # can change subtrees in place without effecting the rest of the tree.
+  class MutationCompiler < Compiler
+    <%- nodes.each_with_index do |node, index| -%>
+<%= "\n" if index != 0 -%>
+    # Copy a <%= node.name %> node
+    def visit_<%= node.human %>(node)
+      <%- fields = node.fields.select { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
+      <%- if fields.any? -%>
+      node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::Template::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
+      <%- else -%>
+      node.copy
+      <%- end -%>
+    end
+    <%- end -%>
+  end
+end
diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb
new file mode 100644
index 0000000000..8225bfb328
--- /dev/null
+++ b/prism/templates/lib/prism/node.rb.erb
@@ -0,0 +1,527 @@
+module Prism
+  # This represents a node in the tree. It is the parent class of all of the
+  # various node types.
+  class Node
+    # A pointer to the source that this node was created from.
+    attr_reader :source
+    private :source
+
+    # A unique identifier for this node. This is used in a very specific
+    # use case where you want to keep around a reference to a node without
+    # having to keep around the syntax tree in memory. This unique identifier
+    # will be consistent across multiple parses of the same source code.
+    attr_reader :node_id
+
+    # Save this node using a saved source so that it can be retrieved later.
+    def save(repository)
+      repository.enter(node_id, :itself)
+    end
+
+    # A Location instance that represents the location of this node in the
+    # source.
+    def location
+      location = @location
+      return location if location.is_a?(Location)
+      @location = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+    end
+
+    # Save the location using a saved source so that it can be retrieved later.
+    def save_location(repository)
+      repository.enter(node_id, :location)
+    end
+
+    # Delegates to the start_line of the associated location object.
+    def start_line
+      location.start_line
+    end
+
+    # Delegates to the end_line of the associated location object.
+    def end_line
+      location.end_line
+    end
+
+    # The start offset of the node in the source. This method is effectively a
+    # delegate method to the location object.
+    def start_offset
+      location = @location
+      location.is_a?(Location) ? location.start_offset : location >> 32
+    end
+
+    # The end offset of the node in the source. This method is effectively a
+    # delegate method to the location object.
+    def end_offset
+      location = @location
+      location.is_a?(Location) ? location.end_offset : ((location >> 32) + (location & 0xFFFFFFFF))
+    end
+
+    # Delegates to the start_character_offset of the associated location object.
+    def start_character_offset
+      location.start_character_offset
+    end
+
+    # Delegates to the end_character_offset of the associated location object.
+    def end_character_offset
+      location.end_character_offset
+    end
+
+    # Delegates to the cached_start_code_units_offset of the associated location
+    # object.
+    def cached_start_code_units_offset(cache)
+      location.cached_start_code_units_offset(cache)
+    end
+
+    # Delegates to the cached_end_code_units_offset of the associated location
+    # object.
+    def cached_end_code_units_offset(cache)
+      location.cached_end_code_units_offset(cache)
+    end
+
+    # Delegates to the start_column of the associated location object.
+    def start_column
+      location.start_column
+    end
+
+    # Delegates to the end_column of the associated location object.
+    def end_column
+      location.end_column
+    end
+
+    # Delegates to the start_character_column of the associated location object.
+    def start_character_column
+      location.start_character_column
+    end
+
+    # Delegates to the end_character_column of the associated location object.
+    def end_character_column
+      location.end_character_column
+    end
+
+    # Delegates to the cached_start_code_units_column of the associated location
+    # object.
+    def cached_start_code_units_column(cache)
+      location.cached_start_code_units_column(cache)
+    end
+
+    # Delegates to the cached_end_code_units_column of the associated location
+    # object.
+    def cached_end_code_units_column(cache)
+      location.cached_end_code_units_column(cache)
+    end
+
+    # Delegates to the leading_comments of the associated location object.
+    def leading_comments
+      location.leading_comments
+    end
+
+    # Delegates to the trailing_comments of the associated location object.
+    def trailing_comments
+      location.trailing_comments
+    end
+
+    # Delegates to the comments of the associated location object.
+    def comments
+      location.comments
+    end
+
+    # Returns all of the lines of the source code associated with this node.
+    def source_lines
+      location.source_lines
+    end
+
+    # An alias for source_lines, used to mimic the API from
+    # RubyVM::AbstractSyntaxTree to make it easier to migrate.
+    alias script_lines source_lines
+
+    # Slice the location of the node from the source.
+    def slice
+      location.slice
+    end
+
+    # Slice the location of the node from the source, starting at the beginning
+    # of the line that the location starts on, ending at the end of the line
+    # that the location ends on.
+    def slice_lines
+      location.slice_lines
+    end
+
+    # An bitset of flags for this node. There are certain flags that are common
+    # for all nodes, and then some nodes have specific flags.
+    attr_reader :flags
+    protected :flags
+
+    # Returns true if the node has the newline flag set.
+    def newline?
+      flags.anybits?(NodeFlags::NEWLINE)
+    end
+
+    # Returns true if the node has the static literal flag set.
+    def static_literal?
+      flags.anybits?(NodeFlags::STATIC_LITERAL)
+    end
+
+    # Similar to inspect, but respects the current level of indentation given by
+    # the pretty print object.
+    def pretty_print(q)
+      q.seplist(inspect.chomp.each_line, -> { q.breakable }) do |line|
+        q.text(line.chomp)
+      end
+      q.current_group.break
+    end
+
+    # Convert this node into a graphviz dot graph string.
+    def to_dot
+      # @type self: node
+      DotVisitor.new.tap { |visitor| accept(visitor) }.to_dot
+    end
+
+    # Returns a list of nodes that are descendants of this node that contain the
+    # given line and column. This is useful for locating a node that is selected
+    # based on the line and column of the source code.
+    #
+    # Important to note is that the column given to this method should be in
+    # bytes, as opposed to characters or code units.
+    def tunnel(line, column)
+      queue = [self] #: Array[Prism::node]
+      result = [] #: Array[Prism::node]
+
+      search_offset = source.line_to_byte_offset(line) + column
+
+      while (node = queue.shift)
+        result << node
+
+        node.each_child_node do |child_node|
+          if child_node.start_offset <= search_offset && search_offset < child_node.end_offset
+            queue << child_node
+            break
+          end
+        end
+      end
+
+      result
+    end
+
+    # Returns the first node that matches the given block when visited in a
+    # depth-first search. This is useful for finding a node that matches a
+    # particular condition.
+    #
+    #     node.breadth_first_search { |node| node.node_id == node_id }
+    #
+    def breadth_first_search(&block)
+      queue = [self] #: Array[Prism::node]
+
+      while (node = queue.shift)
+        return node if yield node
+        queue.concat(node.compact_child_nodes)
+      end
+
+      nil
+    end
+
+    # Returns a list of the fields that exist for this node class. Fields
+    # describe the structure of the node. This kind of reflection is useful for
+    # things like recursively visiting each node _and_ field in the tree.
+    def self.fields
+      # This method should only be called on subclasses of Node, not Node
+      # itself.
+      raise NoMethodError, "undefined method `fields' for #{inspect}" if self == Node
+
+      Reflection.fields_for(self)
+    end
+
+    # --------------------------------------------------------------------------
+    # :section: Node interface
+    # These methods are effectively abstract methods that must be implemented by
+    # the various subclasses of Node. They are here to make it easier to work
+    # with typecheckers.
+    # --------------------------------------------------------------------------
+
+    # Accepts a visitor and calls back into the specialized visit function.
+    def accept(visitor)
+      raise NoMethodError, "undefined method `accept' for #{inspect}"
+    end
+
+    # Returns an array of child nodes, including `nil`s in the place of optional
+    # nodes that were not present.
+    def child_nodes
+      raise NoMethodError, "undefined method `child_nodes' for #{inspect}"
+    end
+
+    alias deconstruct child_nodes
+
+    # With a block given, yields each child node. Without a block, returns
+    # an enumerator that contains each child node. Excludes any `nil`s in
+    # the place of optional nodes that were not present.
+    def each_child_node
+      raise NoMethodError, "undefined method `each_child_node' for #{inspect}"
+    end
+
+    # Returns an array of child nodes, excluding any `nil`s in the place of
+    # optional nodes that were not present.
+    def compact_child_nodes
+      raise NoMethodError, "undefined method `compact_child_nodes' for #{inspect}"
+    end
+
+    # Returns an array of child nodes and locations that could potentially have
+    # comments attached to them.
+    def comment_targets
+      raise NoMethodError, "undefined method `comment_targets' for #{inspect}"
+    end
+
+    # Returns a string representation of the node.
+    def inspect
+      raise NoMethodError, "undefined method `inspect' for #{inspect}"
+    end
+
+    # Sometimes you want to check an instance of a node against a list of
+    # classes to see what kind of behavior to perform. Usually this is done by
+    # calling `[cls1, cls2].include?(node.class)` or putting the node into a
+    # case statement and doing `case node; when cls1; when cls2; end`. Both of
+    # these approaches are relatively slow because of the constant lookups,
+    # method calls, and/or array allocations.
+    #
+    # Instead, you can call #type, which will return to you a symbol that you
+    # can use for comparison. This is faster than the other approaches because
+    # it uses a single integer comparison, but also because if you're on CRuby
+    # you can take advantage of the fact that case statements with all symbol
+    # keys will use a jump table.
+    def type
+      raise NoMethodError, "undefined method `type' for #{inspect}"
+    end
+
+    # Similar to #type, this method returns a symbol that you can use for
+    # splitting on the type of the node without having to do a long === chain.
+    # Note that like #type, it will still be slower than using == for a single
+    # class, but should be faster in a case statement or an array comparison.
+    def self.type
+      raise NoMethodError, "undefined method `type' for #{inspect}"
+    end
+  end
+  <%- nodes.each do |node| -%>
+
+  <%- node.each_comment_line do |line| -%>
+  #<%= line %>
+  <%- end -%>
+  class <%= node.name -%> < Node
+    # Initialize a new <%= node.name %> node.
+    def initialize(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+      @source = source
+      @node_id = node_id
+      @location = location
+      @flags = flags
+      <%- node.fields.each do |field| -%>
+      <%- if Prism::Template::CHECK_FIELD_KIND && field.respond_to?(:check_field_kind) -%>
+      raise "<%= node.name %>#<%= field.name %> was of unexpected type:\n#{<%= field.name %>.inspect}" unless <%= field.check_field_kind %>
+      <%- end -%>
+      @<%= field.name %> = <%= field.name %>
+      <%- end -%>
+    end
+
+    # def accept: (Visitor visitor) -> void
+    def accept(visitor)
+      visitor.visit_<%= node.human %>(self)
+    end
+
+    # def child_nodes: () -> Array[Node?]
+    def child_nodes
+      [<%= node.fields.map { |field|
+        case field
+        when Prism::Template::NodeField, Prism::Template::OptionalNodeField then field.name
+        when Prism::Template::NodeListField then "*#{field.name}"
+        end
+      }.compact.join(", ") %>]
+    end
+
+    # def each_child_node: () { (Prism::node) -> void } -> void | () -> Enumerator[Prism::node]
+    def each_child_node
+      return to_enum(:each_child_node) unless block_given?
+
+      <%- node.fields.each do |field| -%>
+      <%- case field -%>
+      <%- when Prism::Template::NodeField -%>
+      yield <%= field.name %>
+      <%- when Prism::Template::OptionalNodeField -%>
+      yield <%= field.name %> if <%= field.name %>
+      <%- when Prism::Template::NodeListField -%>
+      <%= field.name %>.each { |node| yield node }
+      <%- end -%>
+      <%- end -%>
+    end
+
+    # def compact_child_nodes: () -> Array[Node]
+    def compact_child_nodes
+      <%- if node.fields.any? { |field| field.is_a?(Prism::Template::OptionalNodeField) } -%>
+      compact = [] #: Array[Prism::node]
+      <%- node.fields.each do |field| -%>
+      <%- case field -%>
+      <%- when Prism::Template::NodeField -%>
+      compact << <%= field.name %>
+      <%- when Prism::Template::OptionalNodeField -%>
+      compact << <%= field.name %> if <%= field.name %>
+      <%- when Prism::Template::NodeListField -%>
+      compact.concat(<%= field.name %>)
+      <%- end -%>
+      <%- end -%>
+      compact
+      <%- else -%>
+      [<%= node.fields.map { |field|
+        case field
+        when Prism::Template::NodeField then field.name
+        when Prism::Template::NodeListField then "*#{field.name}"
+        end
+      }.compact.join(", ") %>]
+      <%- end -%>
+    end
+
+    # def comment_targets: () -> Array[Node | Location]
+    def comment_targets
+      [<%= node.fields.map { |field|
+        case field
+        when Prism::Template::NodeField, Prism::Template::LocationField then field.name
+        when Prism::Template::OptionalNodeField, Prism::Template::NodeListField, Prism::Template::OptionalLocationField then "*#{field.name}"
+        end
+      }.compact.join(", ") %>] #: Array[Prism::node | Location]
+    end
+
+    # def copy: (<%= (["?node_id: Integer", "?location: Location", "?flags: Integer"] + node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }).join(", ") %>) -> <%= node.name %>
+    def copy(<%= (["node_id", "location", "flags"] + node.fields.map(&:name)).map { |field| "#{field}: self.#{field}" }.join(", ") %>)
+      <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+    end
+
+    # def deconstruct: () -> Array[Node?]
+    alias deconstruct child_nodes
+
+    # def deconstruct_keys: (Array[Symbol] keys) -> { <%= (["node_id: Integer", "location: Location"] + node.fields.map { |field| "#{field.name}: #{field.rbs_class}" }).join(", ") %> }
+    def deconstruct_keys(keys)
+      { <%= (["node_id: node_id", "location: location"] + node.fields.map { |field| "#{field.name}: #{field.name}" }).join(", ") %> }
+    end
+    <%- if (node_flags = node.flags) -%>
+    <%- node_flags.values.each do |value| -%>
+
+    # def <%= value.name.downcase %>?: () -> bool
+    def <%= value.name.downcase %>?
+      flags.anybits?(<%= node_flags.name %>::<%= value.name %>)
+    end
+    <%- end -%>
+    <%- end -%>
+    <%- node.fields.each do |field| -%>
+
+    <%- if field.comment.nil? -%>
+    # attr_reader <%= field.name %>: <%= field.rbs_class %>
+    <%- else -%>
+    <%- field.each_comment_line do |line| -%>
+    #<%= line %>
+    <%- end -%>
+    <%- end -%>
+    <%- case field -%>
+    <%- when Prism::Template::LocationField -%>
+    def <%= field.name %>
+      location = @<%= field.name %>
+      return location if location.is_a?(Location)
+      @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+    end
+
+    # Save the <%= field.name %> location using the given saved source so that
+    # it can be retrieved later.
+    def save_<%= field.name %>(repository)
+      repository.enter(node_id, :<%= field.name %>)
+    end
+    <%- when Prism::Template::OptionalLocationField -%>
+    def <%= field.name %>
+      location = @<%= field.name %>
+      case location
+      when nil
+        nil
+      when Location
+        location
+      else
+        @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+      end
+    end
+
+    # Save the <%= field.name %> location using the given saved source so that
+    # it can be retrieved later.
+    def save_<%= field.name %>(repository)
+      repository.enter(node_id, :<%= field.name %>) unless @<%= field.name %>.nil?
+    end
+    <%- else -%>
+    attr_reader :<%= field.name %>
+    <%- end -%>
+    <%- end -%>
+    <%- node.fields.each do |field| -%>
+    <%- case field -%>
+    <%- when Prism::Template::LocationField -%>
+    <%- raise unless field.name.end_with?("_loc") -%>
+    <%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
+
+    # def <%= field.name.delete_suffix("_loc") %>: () -> String
+    def <%= field.name.delete_suffix("_loc") %>
+      <%= field.name %>.slice
+    end
+    <%- when Prism::Template::OptionalLocationField -%>
+    <%- raise unless field.name.end_with?("_loc") -%>
+    <%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
+
+    # def <%= field.name.delete_suffix("_loc") %>: () -> String?
+    def <%= field.name.delete_suffix("_loc") %>
+      <%= field.name %>&.slice
+    end
+    <%- end -%>
+    <%- end -%>
+
+    # def inspect -> String
+    def inspect
+      InspectVisitor.compose(self)
+    end
+
+    # Return a symbol representation of this node type. See `Node#type`.
+    def type
+      :<%= node.human %>
+    end
+
+    # Return a symbol representation of this node type. See `Node::type`.
+    def self.type
+      :<%= node.human %>
+    end
+
+    # Implements case-equality for the node. This is effectively == but without
+    # comparing the value of locations. Locations are checked only for presence.
+    def ===(other)
+      other.is_a?(<%= node.name %>)<%= " &&" if (fields = [*node.flags, *node.fields]).any? %>
+        <%- fields.each_with_index do |field, index| -%>
+        <%- if field.is_a?(Prism::Template::LocationField) || field.is_a?(Prism::Template::OptionalLocationField) -%>
+        (<%= field.name %>.nil? == other.<%= field.name %>.nil?)<%= " &&" if index != fields.length - 1 %>
+        <%- elsif field.is_a?(Prism::Template::NodeListField) || field.is_a?(Prism::Template::ConstantListField) -%>
+        (<%= field.name %>.length == other.<%= field.name %>.length) &&
+        <%= field.name %>.zip(other.<%= field.name %>).all? { |left, right| left === right }<%= " &&" if index != fields.length - 1 %>
+        <%- elsif field.is_a?(Prism::Template::Flags) -%>
+        (flags === other.flags)<%= " &&" if index != fields.length - 1 %>
+        <%- else -%>
+        (<%= field.name %> === other.<%= field.name %>)<%= " &&" if index != fields.length - 1 %>
+        <%- end -%>
+        <%- end -%>
+    end
+  end
+  <%- end -%>
+  <%- flags.each do |flag| -%>
+
+  # <%= flag.comment %>
+  module <%= flag.name %>
+    <%- flag.values.each_with_index do |value, index| -%>
+    # <%= value.comment %>
+    <%= value.name %> = 1 << <%= index + Prism::Template::COMMON_FLAGS_COUNT %>
+<%= "\n" if value != flag.values.last -%>
+    <%- end -%>
+  end
+  <%- end -%>
+
+  # The flags that are common to all nodes.
+  module NodeFlags
+    # A flag to indicate that the node is a candidate to emit a :line event
+    # through tracepoint when compiled.
+    NEWLINE = 1
+
+    # A flag to indicate that the value that the node represents is a value that
+    # can be determined at parse-time.
+    STATIC_LITERAL = 2
+  end
+end
diff --git a/prism/templates/lib/prism/reflection.rb.erb b/prism/templates/lib/prism/reflection.rb.erb
new file mode 100644
index 0000000000..6c8b2f4d25
--- /dev/null
+++ b/prism/templates/lib/prism/reflection.rb.erb
@@ -0,0 +1,136 @@
+module Prism
+  # The Reflection module provides the ability to reflect on the structure of
+  # the syntax tree itself, as opposed to looking at a single syntax tree. This
+  # is useful in metaprogramming contexts.
+  module Reflection
+    # A field represents a single piece of data on a node. It is the base class
+    # for all other field types.
+    class Field
+      # The name of the field.
+      attr_reader :name
+
+      # Initializes the field with the given name.
+      def initialize(name)
+        @name = name
+      end
+    end
+
+    # A node field represents a single child node in the syntax tree. It
+    # resolves to a Prism::Node in Ruby.
+    class NodeField < Field
+    end
+
+    # An optional node field represents a single child node in the syntax tree
+    # that may or may not be present. It resolves to either a Prism::Node or nil
+    # in Ruby.
+    class OptionalNodeField < Field
+    end
+
+    # A node list field represents a list of child nodes in the syntax tree. It
+    # resolves to an array of Prism::Node instances in Ruby.
+    class NodeListField < Field
+    end
+
+    # A constant field represents a constant value on a node. Effectively, it
+    # represents an identifier found within the source. It resolves to a symbol
+    # in Ruby.
+    class ConstantField < Field
+    end
+
+    # An optional constant field represents a constant value on a node that may
+    # or may not be present. It resolves to either a symbol or nil in Ruby.
+    class OptionalConstantField < Field
+    end
+
+    # A constant list field represents a list of constant values on a node. It
+    # resolves to an array of symbols in Ruby.
+    class ConstantListField < Field
+    end
+
+    # A string field represents a string value on a node. It almost always
+    # represents the unescaped value of a string-like literal. It resolves to a
+    # string in Ruby.
+    class StringField < Field
+    end
+
+    # A location field represents the location of some part of the node in the
+    # source code. For example, the location of a keyword or an operator. It
+    # resolves to a Prism::Location in Ruby.
+    class LocationField < Field
+    end
+
+    # An optional location field represents the location of some part of the
+    # node in the source code that may or may not be present. It resolves to
+    # either a Prism::Location or nil in Ruby.
+    class OptionalLocationField < Field
+    end
+
+    # An integer field represents an integer value. It is used to represent the
+    # value of an integer literal, the depth of local variables, and the number
+    # of a numbered reference. It resolves to an Integer in Ruby.
+    class IntegerField < Field
+    end
+
+    # A float field represents a double-precision floating point value. It is
+    # used exclusively to represent the value of a floating point literal. It
+    # resolves to a Float in Ruby.
+    class FloatField < Field
+    end
+
+    # A flags field represents a bitset of flags on a node. It resolves to an
+    # integer in Ruby. Note that the flags cannot be accessed directly on the
+    # node because the integer is kept private. Instead, the various flags in
+    # the bitset should be accessed through their query methods.
+    class FlagsField < Field
+      # The names of the flags in the bitset.
+      attr_reader :flags
+
+      # Initializes the flags field with the given name and flags.
+      def initialize(name, flags)
+        super(name)
+        @flags = flags
+      end
+    end
+
+    # Returns the fields for the given node.
+    def self.fields_for(node)
+      case node.type
+      <%- nodes.each do |node| -%>
+      when :<%= node.human %>
+        [<%= [*node.flags, *node.fields].map { |field|
+          case field
+          when Prism::Template::NodeField
+            "NodeField.new(:#{field.name})"
+          when Prism::Template::OptionalNodeField
+            "OptionalNodeField.new(:#{field.name})"
+          when Prism::Template::NodeListField
+            "NodeListField.new(:#{field.name})"
+          when Prism::Template::ConstantField
+            "ConstantField.new(:#{field.name})"
+          when Prism::Template::OptionalConstantField
+            "OptionalConstantField.new(:#{field.name})"
+          when Prism::Template::ConstantListField
+            "ConstantListField.new(:#{field.name})"
+          when Prism::Template::StringField
+            "StringField.new(:#{field.name})"
+          when Prism::Template::LocationField
+            "LocationField.new(:#{field.name})"
+          when Prism::Template::OptionalLocationField
+            "OptionalLocationField.new(:#{field.name})"
+          when Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField
+            "IntegerField.new(:#{field.name})"
+          when Prism::Template::DoubleField
+            "FloatField.new(:#{field.name})"
+          when Prism::Template::Flags
+            "FlagsField.new(:flags, [#{field.values.map { |value| ":#{value.name.downcase}?" }.join(", ")}])"
+          else
+            raise field.class.name
+          end
+        }.join(", ") %>]
+      <%- end -%>
+      else
+        raise "Unknown node type: #{node.type.inspect}"
+      end
+    end
+  end
+end
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
new file mode 100644
index 0000000000..6902df5c01
--- /dev/null
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -0,0 +1,602 @@
+require "stringio"
+require_relative "polyfill/unpack1"
+
+module Prism
+  # A module responsible for deserializing parse results.
+  module Serialize
+    # The major version of prism that we are expecting to find in the serialized
+    # strings.
+    MAJOR_VERSION = 1
+
+    # The minor version of prism that we are expecting to find in the serialized
+    # strings.
+    MINOR_VERSION = 8
+
+    # The patch version of prism that we are expecting to find in the serialized
+    # strings.
+    PATCH_VERSION = 0
+
+    # Deserialize the dumped output from a request to parse or parse_file.
+    #
+    # The formatting of the source of this method is purposeful to illustrate
+    # the structure of the serialized data.
+    def self.load_parse(input, serialized, freeze)
+      input = input.dup
+      source = Source.for(input)
+      loader = Loader.new(source, serialized)
+
+                       loader.load_header
+      encoding =       loader.load_encoding
+      start_line =     loader.load_varsint
+      offsets =        loader.load_line_offsets(freeze)
+
+      source.replace_start_line(start_line)
+      source.replace_offsets(offsets)
+
+      comments =       loader.load_comments(freeze)
+      magic_comments = loader.load_magic_comments(freeze)
+      data_loc =       loader.load_optional_location_object(freeze)
+      errors =         loader.load_errors(encoding, freeze)
+      warnings =       loader.load_warnings(encoding, freeze)
+      cpool_base =     loader.load_uint32
+      cpool_size =     loader.load_varuint
+
+      constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+
+      node =           loader.load_node(constant_pool, encoding, freeze)
+                       loader.load_constant_pool(constant_pool)
+      raise unless     loader.eof?
+
+      result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source)
+      result.freeze if freeze
+
+      input.force_encoding(encoding)
+
+      # This is an extremely niche use-case where the file was marked as binary
+      # but it contained UTF-8-encoded characters. In that case we will actually
+      # put it back to UTF-8 to give the location APIs the best chance of being
+      # correct.
+      if !input.ascii_only? && input.encoding == Encoding::BINARY
+        input.force_encoding(Encoding::UTF_8)
+        input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
+      end
+
+      if freeze
+        input.freeze
+        source.deep_freeze
+      end
+
+      result
+    end
+
+    # Deserialize the dumped output from a request to lex or lex_file.
+    #
+    # The formatting of the source of this method is purposeful to illustrate
+    # the structure of the serialized data.
+    def self.load_lex(input, serialized, freeze)
+      source = Source.for(input)
+      loader = Loader.new(source, serialized)
+
+      tokens =         loader.load_tokens
+      encoding =       loader.load_encoding
+      start_line =     loader.load_varsint
+      offsets =        loader.load_line_offsets(freeze)
+
+      source.replace_start_line(start_line)
+      source.replace_offsets(offsets)
+
+      comments =       loader.load_comments(freeze)
+      magic_comments = loader.load_magic_comments(freeze)
+      data_loc =       loader.load_optional_location_object(freeze)
+      errors =         loader.load_errors(encoding, freeze)
+      warnings =       loader.load_warnings(encoding, freeze)
+      raise unless     loader.eof?
+
+      result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source)
+
+      tokens.each do |token|
+        token[0].value.force_encoding(encoding)
+
+        if freeze
+          token[0].deep_freeze
+          token.freeze
+        end
+      end
+
+      if freeze
+        source.deep_freeze
+        tokens.freeze
+        result.freeze
+      end
+
+      result
+    end
+
+    # Deserialize the dumped output from a request to parse_comments or
+    # parse_file_comments.
+    #
+    # The formatting of the source of this method is purposeful to illustrate
+    # the structure of the serialized data.
+    def self.load_parse_comments(input, serialized, freeze)
+      source = Source.for(input)
+      loader = Loader.new(source, serialized)
+
+                   loader.load_header
+                   loader.load_encoding
+      start_line = loader.load_varsint
+
+      source.replace_start_line(start_line)
+
+      result =     loader.load_comments(freeze)
+      raise unless loader.eof?
+
+      source.deep_freeze if freeze
+      result
+    end
+
+    # Deserialize the dumped output from a request to parse_lex or
+    # parse_lex_file.
+    #
+    # The formatting of the source of this method is purposeful to illustrate
+    # the structure of the serialized data.
+    def self.load_parse_lex(input, serialized, freeze)
+      source = Source.for(input)
+      loader = Loader.new(source, serialized)
+
+      tokens =         loader.load_tokens
+                       loader.load_header
+      encoding =       loader.load_encoding
+      start_line =     loader.load_varsint
+      offsets =        loader.load_line_offsets(freeze)
+
+      source.replace_start_line(start_line)
+      source.replace_offsets(offsets)
+
+      comments =       loader.load_comments(freeze)
+      magic_comments = loader.load_magic_comments(freeze)
+      data_loc =       loader.load_optional_location_object(freeze)
+      errors =         loader.load_errors(encoding, freeze)
+      warnings =       loader.load_warnings(encoding, freeze)
+      cpool_base =     loader.load_uint32
+      cpool_size =     loader.load_varuint
+
+      constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+
+      node =           loader.load_node(constant_pool, encoding, freeze)
+                       loader.load_constant_pool(constant_pool)
+      raise unless     loader.eof?
+
+      value = [node, tokens]
+      result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
+
+      tokens.each do |token|
+        token[0].value.force_encoding(encoding)
+
+        if freeze
+          token[0].deep_freeze
+          token.freeze
+        end
+      end
+
+      if freeze
+        source.deep_freeze
+        tokens.freeze
+        value.freeze
+        result.freeze
+      end
+
+      result
+    end
+
+    class ConstantPool # :nodoc:
+      attr_reader :size
+
+      def initialize(input, serialized, base, size)
+        @input = input
+        @serialized = serialized
+        @base = base
+        @size = size
+        @pool = Array.new(size, nil)
+      end
+
+      def get(index, encoding)
+        @pool[index] ||=
+          begin
+            offset = @base + index * 8
+            start = @serialized.unpack1("L", offset: offset)
+            length = @serialized.unpack1("L", offset: offset + 4)
+
+            if start.nobits?(1 << 31)
+              @input.byteslice(start, length).force_encoding(encoding).to_sym
+            else
+              @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(encoding).to_sym
+            end
+          end
+      end
+    end
+
+    if RUBY_ENGINE == "truffleruby"
+      # StringIO is synchronized and that adds a high overhead on TruffleRuby.
+      class FastStringIO # :nodoc:
+        attr_accessor :pos
+
+        def initialize(string)
+          @string = string
+          @pos = 0
+        end
+
+        def getbyte
+          byte = @string.getbyte(@pos)
+          @pos += 1
+          byte
+        end
+
+        def read(n)
+          slice = @string.byteslice(@pos, n)
+          @pos += n
+          slice
+        end
+
+        def eof?
+          @pos >= @string.bytesize
+        end
+      end
+    else
+      FastStringIO = ::StringIO # :nodoc:
+    end
+
+    class Loader # :nodoc:
+      attr_reader :input, :io, :source
+
+      def initialize(source, serialized)
+        @input = source.source.dup
+        raise unless serialized.encoding == Encoding::BINARY
+        @io = FastStringIO.new(serialized)
+        @source = source
+        define_load_node_lambdas if RUBY_ENGINE != "ruby"
+      end
+
+      def eof?
+        io.getbyte
+        io.eof?
+      end
+
+      def load_constant_pool(constant_pool)
+        trailer = 0
+
+        constant_pool.size.times do |index|
+          start, length = io.read(8).unpack("L2")
+          trailer += length if start.anybits?(1 << 31)
+        end
+
+        io.read(trailer)
+      end
+
+      def load_header
+        raise "Invalid serialization" if io.read(5) != "PRISM"
+        raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
+        raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0
+      end
+
+      def load_encoding
+        encoding = Encoding.find(io.read(load_varuint))
+        @input = input.force_encoding(encoding).freeze
+        encoding
+      end
+
+      def load_line_offsets(freeze)
+        offsets = Array.new(load_varuint) { load_varuint }
+        offsets.freeze if freeze
+        offsets
+      end
+
+      def load_comments(freeze)
+        comments =
+          Array.new(load_varuint) do
+            comment =
+              case load_varuint
+              when 0 then InlineComment.new(load_location_object(freeze))
+              when 1 then EmbDocComment.new(load_location_object(freeze))
+              end
+
+            comment.freeze if freeze
+            comment
+          end
+
+        comments.freeze if freeze
+        comments
+      end
+
+      def load_magic_comments(freeze)
+        magic_comments =
+          Array.new(load_varuint) do
+            magic_comment =
+              MagicComment.new(
+                load_location_object(freeze),
+                load_location_object(freeze)
+              )
+
+            magic_comment.freeze if freeze
+            magic_comment
+          end
+
+        magic_comments.freeze if freeze
+        magic_comments
+      end
+
+      DIAGNOSTIC_TYPES = [
+        <%- errors.each do |error| -%>
+        <%= error.name.downcase.to_sym.inspect %>,
+        <%- end -%>
+        <%- warnings.each do |warning| -%>
+        <%= warning.name.downcase.to_sym.inspect %>,
+        <%- end -%>
+      ].freeze
+
+      private_constant :DIAGNOSTIC_TYPES
+
+      def load_error_level
+        level = io.getbyte
+
+        case level
+        when 0
+          :syntax
+        when 1
+          :argument
+        when 2
+          :load
+        else
+          raise "Unknown level: #{level}"
+        end
+      end
+
+      def load_errors(encoding, freeze)
+        errors =
+          Array.new(load_varuint) do
+            error =
+              ParseError.new(
+                DIAGNOSTIC_TYPES.fetch(load_varuint),
+                load_embedded_string(encoding),
+                load_location_object(freeze),
+                load_error_level
+              )
+
+            error.freeze if freeze
+            error
+          end
+
+        errors.freeze if freeze
+        errors
+      end
+
+      def load_warning_level
+        level = io.getbyte
+
+        case level
+        when 0
+          :default
+        when 1
+          :verbose
+        else
+          raise "Unknown level: #{level}"
+        end
+      end
+
+      def load_warnings(encoding, freeze)
+        warnings =
+          Array.new(load_varuint) do
+            warning =
+              ParseWarning.new(
+                DIAGNOSTIC_TYPES.fetch(load_varuint),
+                load_embedded_string(encoding),
+                load_location_object(freeze),
+                load_warning_level
+              )
+
+            warning.freeze if freeze
+            warning
+          end
+
+        warnings.freeze if freeze
+        warnings
+      end
+
+      def load_tokens
+        tokens = []
+
+        while (type = TOKEN_TYPES.fetch(load_varuint))
+          start = load_varuint
+          length = load_varuint
+          lex_state = load_varuint
+
+          location = Location.new(@source, start, length)
+          token = Token.new(@source, type, location.slice, location)
+
+          tokens << [token, lex_state]
+        end
+
+        tokens
+      end
+
+      # variable-length integer using https://en.wikipedia.org/wiki/LEB128
+      # This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
+      def load_varuint
+        n = io.getbyte
+        if n < 128
+          n
+        else
+          n -= 128
+          shift = 0
+          while (b = io.getbyte) >= 128
+            n += (b - 128) << (shift += 7)
+          end
+          n + (b << (shift + 7))
+        end
+      end
+
+      def load_varsint
+        n = load_varuint
+        (n >> 1) ^ (-(n & 1))
+      end
+
+      def load_integer
+        negative = io.getbyte != 0
+        length = load_varuint
+
+        value = 0
+        length.times { |index| value |= (load_varuint << (index * 32)) }
+
+        value = -value if negative
+        value
+      end
+
+      def load_double
+        io.read(8).unpack1("D")
+      end
+
+      def load_uint32
+        io.read(4).unpack1("L")
+      end
+
+      def load_optional_node(constant_pool, encoding, freeze)
+        if io.getbyte != 0
+          io.pos -= 1
+          load_node(constant_pool, encoding, freeze)
+        end
+      end
+
+      def load_embedded_string(encoding)
+        io.read(load_varuint).force_encoding(encoding).freeze
+      end
+
+      def load_string(encoding)
+        case (type = io.getbyte)
+        when 1
+          input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze
+        when 2
+          load_embedded_string(encoding)
+        else
+          raise "Unknown serialized string type: #{type}"
+        end
+      end
+
+      def load_location_object(freeze)
+        location = Location.new(source, load_varuint, load_varuint)
+        location.freeze if freeze
+        location
+      end
+
+      def load_location(freeze)
+        return load_location_object(freeze) if freeze
+        (load_varuint << 32) | load_varuint
+      end
+
+      def load_optional_location(freeze)
+        load_location(freeze) if io.getbyte != 0
+      end
+
+      def load_optional_location_object(freeze)
+        load_location_object(freeze) if io.getbyte != 0
+      end
+
+      def load_constant(constant_pool, encoding)
+        index = load_varuint
+        constant_pool.get(index - 1, encoding)
+      end
+
+      def load_optional_constant(constant_pool, encoding)
+        index = load_varuint
+        constant_pool.get(index - 1, encoding) if index != 0
+      end
+
+      if RUBY_ENGINE == "ruby"
+        def load_node(constant_pool, encoding, freeze)
+          type = io.getbyte
+          node_id = load_varuint
+          location = load_location(freeze)
+          value = case type
+          <%- nodes.each_with_index do |node, index| -%>
+          when <%= index + 1 %> then
+            <%- if node.needs_serialized_length? -%>
+            load_uint32
+            <%- end -%>
+            <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
+              case field
+              when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
+              when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
+              when Prism::Template::StringField then "load_string(encoding)"
+              when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }.tap { |nodes| nodes.freeze if freeze }"
+              when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
+              when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
+              when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze }"
+              when Prism::Template::LocationField then "load_location(freeze)"
+              when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
+              when Prism::Template::UInt8Field then "io.getbyte"
+              when Prism::Template::UInt32Field then "load_varuint"
+              when Prism::Template::IntegerField then "load_integer"
+              when Prism::Template::DoubleField then "load_double"
+              else raise
+              end
+            }].join(", ") -%>)
+            <%- end -%>
+          end
+
+          value.freeze if freeze
+          value
+        end
+      else
+        def load_node(constant_pool, encoding, freeze)
+          @load_node_lambdas[io.getbyte].call(constant_pool, encoding, freeze)
+        end
+
+        def define_load_node_lambdas
+          @load_node_lambdas = [
+            nil,
+            <%- nodes.each do |node| -%>
+            -> (constant_pool, encoding, freeze) {
+              node_id = load_varuint
+              location = load_location(freeze)
+              <%- if node.needs_serialized_length? -%>
+              load_uint32
+              <%- end -%>
+              value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
+                case field
+                when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
+                when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
+                when Prism::Template::StringField then "load_string(encoding)"
+                when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }"
+                when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
+                when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
+                when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }"
+                when Prism::Template::LocationField then "load_location(freeze)"
+                when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
+                when Prism::Template::UInt8Field then "io.getbyte"
+                when Prism::Template::UInt32Field then "load_varuint"
+                when Prism::Template::IntegerField then "load_integer"
+                when Prism::Template::DoubleField then "load_double"
+                else raise
+                end
+              }].join(", ") -%>)
+              value.freeze if freeze
+              value
+            },
+            <%- end -%>
+          ]
+        end
+      end
+    end
+
+    # The token types that can be indexed by their enum values.
+    TOKEN_TYPES = [
+      nil,
+      <%- tokens.each do |token| -%>
+      <%= token.name.to_sym.inspect %>,
+      <%- end -%>
+    ].freeze
+
+    private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION
+    private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES
+  end
+
+  private_constant :Serialize
+end
diff --git a/prism/templates/lib/prism/visitor.rb.erb b/prism/templates/lib/prism/visitor.rb.erb
new file mode 100644
index 0000000000..76f907724f
--- /dev/null
+++ b/prism/templates/lib/prism/visitor.rb.erb
@@ -0,0 +1,55 @@
+module Prism
+  # A class that knows how to walk down the tree. None of the individual visit
+  # methods are implemented on this visitor, so it forces the consumer to
+  # implement each one that they need. For a default implementation that
+  # continues walking the tree, see the Visitor class.
+  class BasicVisitor
+    # Calls `accept` on the given node if it is not `nil`, which in turn should
+    # call back into this visitor by calling the appropriate `visit_*` method.
+    def visit(node)
+      # @type self: _Visitor
+      node&.accept(self)
+    end
+
+    # Visits each node in `nodes` by calling `accept` on each one.
+    def visit_all(nodes)
+      # @type self: _Visitor
+      nodes.each { |node| node&.accept(self) }
+    end
+
+    # Visits the child nodes of `node` by calling `accept` on each one.
+    def visit_child_nodes(node)
+      # @type self: _Visitor
+      node.each_child_node { |node| node.accept(self) }
+    end
+  end
+
+  # A visitor is a class that provides a default implementation for every accept
+  # method defined on the nodes. This means it can walk a tree without the
+  # caller needing to define any special handling. This allows you to handle a
+  # subset of the tree, while still walking the whole tree.
+  #
+  # For example, to find all of the method calls that call the `foo` method, you
+  # could write:
+  #
+  #     class FooCalls < Prism::Visitor
+  #       def visit_call_node(node)
+  #         if node.name == :foo
+  #           # Do something with the node
+  #         end
+  #
+  #         # Call super so that the visitor continues walking the tree
+  #         super
+  #       end
+  #     end
+  #
+  class Visitor < BasicVisitor
+    <%- nodes.each_with_index do |node, index| -%>
+<%= "\n" if index != 0 -%>
+    # Visit a <%= node.name %> node
+    def visit_<%= node.human %>(node)
+      node.each_child_node { |node| node.accept(self) }
+    end
+    <%- end -%>
+  end
+end
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
new file mode 100644
index 0000000000..121dd4b2b6
--- /dev/null
+++ b/prism/templates/src/diagnostic.c.erb
@@ -0,0 +1,526 @@
+#include "prism/diagnostic.h"
+
+#define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %>
+
+/** This struct holds the data for each diagnostic. */
+typedef struct {
+    /** The message associated with the diagnostic. */
+    const char* message;
+
+    /** The level associated with the diagnostic. */
+    uint8_t level;
+} pm_diagnostic_data_t;
+
+/**
+ * ## Message composition
+ *
+ * When composing an error message, use sentence fragments.
+ *
+ * Try describing the property of the code that caused the error, rather than
+ * the rule that is being violated. It may help to use a fragment that completes
+ * a sentence beginning, "the parser encountered (a) ...". If appropriate, add a
+ * description of the rule violation (or other helpful context) after a
+ * semicolon.
+ *
+ * For example:, instead of "control escape sequence cannot be doubled", prefer:
+ *
+ * > "invalid control escape sequence; control cannot be repeated"
+ *
+ * In some cases, where the failure is more general or syntax expectations are
+ * violated, it may make more sense to use a fragment that completes a sentence
+ * beginning, "the parser ...".
+ *
+ * For example:
+ *
+ * > "expected an expression after `(`"
+ * > "cannot parse the expression"
+ *
+ * ## Message style guide
+ *
+ * - Use articles like "a", "an", and "the" when appropriate.
+ *   - e.g., prefer "cannot parse the expression" to "cannot parse expression".
+ * - Use the common name for tokens and nodes.
+ *   - e.g., prefer "keyword splat" to "assoc splat"
+ *   - e.g., prefer "embedded document" to "embdoc"
+ * - Do not capitalize the initial word of the message.
+ * - Use back ticks around token literals
+ *   - e.g., "Expected a `=>` between the hash key and value"
+ * - Do not use `.` or other punctuation at the end of the message.
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
+ *   - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
+ *
+ * ## Error names (PM_ERR_*)
+ *
+ * - When appropriate, prefer node name to token name.
+ *   - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
+ * - Prefer token name to common name.
+ *   - e.g., prefer "STAR" to "ASTERISK".
+ * - Try to order the words in the name from more general to more specific,
+ *   - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
+ *   - When in doubt, look for similar patterns and name them so that they are grouped when lexically
+ *     sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
+ *
+ * ## Level
+ *
+ * For errors, they are:
+ *
+ * * `PM_ERROR_LEVEL_SYNTAX` - Errors that should raise SyntaxError.
+ * * `PM_ERROR_LEVEL_ARGUMENT` - Errors that should raise ArgumentError.
+ * * `PM_ERROR_LEVEL_LOAD` - Errors that should raise LoadError.
+ *
+ * For warnings, they are:
+ *
+ * * `PM_WARNING_LEVEL_DEFAULT` - Warnings that appear for `ruby -c -e 'code'`.
+ * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
+ */
+static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
+    // Special error that can be replaced
+    [PM_ERR_CANNOT_PARSE_EXPRESSION]            = { "cannot parse the expression", PM_ERROR_LEVEL_SYNTAX },
+
+    // Errors that should raise argument errors
+    [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT]     = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
+
+    // Errors that should raise load errors
+    [PM_ERR_SCRIPT_NOT_FOUND]                   = { "no Ruby script found in input", PM_ERROR_LEVEL_LOAD },
+
+    // Errors that should raise syntax errors
+    [PM_ERR_ALIAS_ARGUMENT]                     = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE]  = { "invalid argument being passed to `alias`; can't make alias for the number variables", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_AMPAMPEQ_MULTI_ASSIGN]              = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_AFTER_BLOCK]               = { "unexpected argument after a block argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES] = { "unexpected argument after `...`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_BARE_HASH]                 = { "unexpected bare hash argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_BLOCK_MULTI]               = { "both block arg and actual block given; only one block is allowed", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_CONFLICT_AMPERSAND]        = { "unexpected `&`; anonymous block parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_CONFLICT_STAR]             = { "unexpected `*`; anonymous rest parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_CONFLICT_STAR_STAR]        = { "unexpected `**`; anonymous keyword rest parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_FORMAL_CLASS]              = { "invalid formal argument; formal argument cannot be a class variable", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_FORMAL_CONSTANT]           = { "invalid formal argument; formal argument cannot be a constant", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_FORMAL_GLOBAL]             = { "invalid formal argument; formal argument cannot be a global variable", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_FORMAL_IVAR]               = { "invalid formal argument; formal argument cannot be an instance variable", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_FORWARDING_UNBOUND]        = { "unexpected `...` in an non-parenthesized call", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND]   = { "unexpected `&`; no anonymous block parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES]    = { "unexpected ... when the parent method is not forwarding", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_NO_FORWARDING_STAR]        = { "unexpected `*`; no anonymous rest parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR]   = { "unexpected `**`; no anonymous keyword rest parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT]   = { "unexpected `*` splat argument after a `**` keyword splat argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT]         = { "unexpected `*` splat argument after a `*` splat argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_TERM_PAREN]                = { "unexpected %s; expected a `)` to close the arguments", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARGUMENT_UNEXPECTED_BLOCK]          = { "unexpected '{' after a method call without parenthesis", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARRAY_ELEMENT]                      = { "expected an element for the array", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARRAY_EXPRESSION]                   = { "expected an expression for the array element", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARRAY_EXPRESSION_AFTER_STAR]        = { "expected an expression after `*` in the array", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARRAY_SEPARATOR]                    = { "unexpected %s; expected a `,` separator for the array elements", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ARRAY_TERM]                         = { "unexpected %s; expected a `]` to close the array", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BEGIN_LONELY_ELSE]                  = { "unexpected `else` in `begin` block; else without rescue is useless", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BEGIN_TERM]                         = { "expected an `end` to close the `begin` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BEGIN_UPCASE_BRACE]                 = { "expected a `{` after `BEGIN`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BEGIN_UPCASE_TERM]                  = { "expected a `}` to close the `BEGIN` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BEGIN_UPCASE_TOPLEVEL]              = { "BEGIN is permitted only at toplevel", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE]         = { "expected a local variable name in the block parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BLOCK_PARAM_PIPE_TERM]              = { "expected the block parameters to end with `|`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BLOCK_TERM_BRACE]                   = { "expected a block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_BLOCK_TERM_END]                     = { "expected a block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CANNOT_PARSE_STRING_PART]           = { "cannot parse the string part", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CASE_EXPRESSION_AFTER_CASE]         = { "expected an expression after `case`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CASE_EXPRESSION_AFTER_WHEN]         = { "expected an expression after `when`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CASE_MATCH_MISSING_PREDICATE]       = { "expected a predicate for a case matching statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CASE_MISSING_CONDITIONS]            = { "expected a `when` or `in` clause after `case`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CASE_TERM]                          = { "expected an `end` to close the `case` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CLASS_IN_METHOD]                    = { "unexpected class definition in method body", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CLASS_NAME]                         = { "unexpected constant path after `class`; class/module name must be CONSTANT", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CLASS_SUPERCLASS]                   = { "expected a superclass after `<`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CLASS_TERM]                         = { "expected an `end` to close the `class` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CLASS_UNEXPECTED_END]               = { "unexpected `end`, expecting ';' or '\\n'", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CLASS_VARIABLE_BARE]                = { "'@@' without identifiers is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_ELSIF_PREDICATE]        = { "expected a predicate expression for the `elsif` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_IF_PREDICATE]           = { "expected a predicate expression for the `if` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_PREDICATE_TERM]         = { "expected `then` or `;` or '\\n'", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_TERM]                   = { "expected an `end` to close the conditional clause", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_TERM_ELSE]              = { "expected an `end` to close the `else` clause", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_UNLESS_PREDICATE]       = { "expected a predicate expression for the `unless` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_UNTIL_PREDICATE]        = { "expected a predicate expression for the `until` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONDITIONAL_WHILE_PREDICATE]        = { "expected a predicate expression for the `while` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = { "expected a constant after the `::` operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_ENDLESS]                        = { "could not parse the endless method body", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_ENDLESS_PARAMETERS]             = { "could not parse the endless method parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_ENDLESS_SETTER]                 = { "invalid method name; a setter method cannot be defined in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_NAME]                           = { "unexpected %s; expected a method name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_PARAMS_TERM]                    = { "expected a delimiter to close the parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_PARAMS_TERM_PAREN]              = { "unexpected %s; expected a `)` to close the parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_RECEIVER]                       = { "expected a receiver for the method definition", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_RECEIVER_TERM]                  = { "expected a `.` or `::` after the receiver in a method definition", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEF_TERM]                           = { "expected an `end` to close the `def` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_DEFINED_EXPRESSION]                 = { "expected an expression after `defined?`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EMBDOC_TERM]                        = { "embedded document meets end of file", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EMBEXPR_END]                        = { "expected a `}` to close the embedded expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EMBVAR_INVALID]                     = { "invalid embedded variable", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_END_UPCASE_BRACE]                   = { "expected a `{` after `END`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_END_UPCASE_TERM]                    = { "expected a `}` to close the `END` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_CONTROL]             = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT]      = { "invalid control escape sequence; control cannot be repeated", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_HEXADECIMAL]         = { "invalid hex escape sequence", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_META]                = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_META_REPEAT]         = { "invalid meta escape sequence; meta cannot be repeated", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE]             = { "invalid Unicode escape sequence", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS]    = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_LIST]        = { "invalid Unicode list: %.*s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL]     = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_LONG]        = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_SHORT]       = { "too short escape sequence: %.*s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_ESCAPE_INVALID_UNICODE_TERM]        = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_ARGUMENT]                    = { "unexpected %s; expected an argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EOL_AFTER_STATEMENT]         = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ]   = { "expected an expression after `&&=`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = { "expected an expression after `||=`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA]      = { "expected an expression after `,`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL]      = { "expected an expression after `=`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS]  = { "expected an expression after `<<`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN]     = { "expected an expression after `(`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR]   = { "unexpected %s; expected an expression after the operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT]      = { "expected an expression after `*` splat in an argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = { "expected an expression after `**` in a hash", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_EXPRESSION_AFTER_STAR]       = { "expected an expression after `*`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_FOR_DELIMITER]               = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_IDENT_REQ_PARAMETER]         = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_IN_DELIMITER]                = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN]     = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER]      = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER]        = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_MESSAGE]                     = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_RBRACKET]                    = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_RPAREN]                      = { "expected a matching `)`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_RPAREN_AFTER_MULTI]          = { "expected a `)` after multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_RPAREN_REQ_PARAMETER]        = { "expected a `)` to end a required parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER]   = { "unexpected %s; expected a newline or a ';' after the singleton class", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_STRING_CONTENT]              = { "expected string content after opening string delimiter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPECT_WHEN_DELIMITER]              = { "expected a delimiter after the predicates of a `when` clause", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_BARE_HASH]               = { "unexpected bare hash in expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE]            = { "unexpected '='; target cannot be written", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING]   = { "Can't assign to __ENCODING__", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE]      = { "Can't assign to false", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_FILE]       = { "Can't assign to __FILE__", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_LINE]       = { "Can't assign to __LINE__", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_NIL]        = { "Can't assign to nil", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED]   = { "Can't assign to numbered parameter %.2s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_SELF]       = { "Can't change the value of self", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE]       = { "Can't assign to true", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_FLOAT_PARSE]                        = { "could not parse the float '%.*s'", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_FOR_COLLECTION]                     = { "expected a collection after the `in` in a `for` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_FOR_INDEX]                          = { "expected an index after `for`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_FOR_IN]                             = { "expected an `in` after the index in a `for` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_FOR_TERM]                           = { "expected an `end` to close the `for` loop", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_GLOBAL_VARIABLE_BARE]               = { "'$' without identifiers is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HASH_EXPRESSION_AFTER_LABEL]        = { "expected an expression after the label in a hash", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HASH_KEY]                           = { "unexpected %s, expecting '}' or a key in the hash literal", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HASH_ROCKET]                        = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HASH_TERM]                          = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HASH_VALUE]                         = { "unexpected %s; expected a value in the hash literal", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HEREDOC_IDENTIFIER]                 = { "unterminated here document identifier", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_HEREDOC_TERM]                       = { "unterminated heredoc; can't find string \"%.*s\" anywhere before EOF", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INCOMPLETE_QUESTION_MARK]           = { "incomplete expression at `?`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3]      = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INCOMPLETE_VARIABLE_CLASS]          = { "'%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3]   = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE]       = { "'%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INSTANCE_VARIABLE_BARE]             = { "'@' without identifiers is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_BLOCK_EXIT]                 = { "Invalid %s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_COMMA]                      = { "invalid comma", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_ESCAPE_CHARACTER]           = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_FLOAT_EXPONENT]             = { "invalid exponent", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_LOCAL_VARIABLE_READ]        = { "identifier %.*s is not valid to get", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_LOCAL_VARIABLE_WRITE]       = { "identifier %.*s is not valid to set", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_BINARY]              = { "invalid binary number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_DECIMAL]             = { "invalid decimal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_FRACTION]            = { "unexpected fraction part after numeric literal", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_HEXADECIMAL]         = { "invalid hexadecimal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_OCTAL]               = { "invalid octal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER]    = { "invalid underscore placement in number", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING] = { "trailing '_' in number", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_CHARACTER]                  = { "Invalid char '\\x%02X' in expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_MULTIBYTE_CHAR]             = { "invalid multibyte char (%s)", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_MULTIBYTE_CHARACTER]        = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_MULTIBYTE_ESCAPE]           = { "invalid multibyte escape: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_PRINTABLE_CHARACTER]        = { "invalid character `%c`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_PERCENT]                    = { "unknown type of %string", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_PERCENT_EOF]                = { "unterminated quoted string meets end of file", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_RETRY_AFTER_ELSE]           = { "Invalid retry after else", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_RETRY_AFTER_ENSURE]         = { "Invalid retry after ensure", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_RETRY_WITHOUT_RESCUE]       = { "Invalid retry without rescue", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_SYMBOL]                     = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_VARIABLE_GLOBAL_3_3]        = { "`%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_VARIABLE_GLOBAL]            = { "'%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_INVALID_YIELD]                      = { "Invalid yield", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_IT_NOT_ALLOWED_NUMBERED]            = { "'it' is not allowed when a numbered parameter is already used", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_IT_NOT_ALLOWED_ORDINARY]            = { "'it' is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LAMBDA_OPEN]                        = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LAMBDA_TERM_BRACE]                  = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LAMBDA_TERM_END]                    = { "expected a lambda block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_I_LOWER_ELEMENT]               = { "expected a symbol in a `%i` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_I_LOWER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%i`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_I_UPPER_ELEMENT]               = { "expected a symbol in a `%I` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_I_UPPER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%I`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_W_LOWER_ELEMENT]               = { "expected a string in a `%w` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_W_LOWER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%w`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_W_UPPER_ELEMENT]               = { "expected a string in a `%W` list", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_LIST_W_UPPER_TERM]                  = { "unterminated list; expected a closing delimiter for the `%W`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MALLOC_FAILED]                      = { "failed to allocate memory", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MIXED_ENCODING]                     = { "UTF-8 mixed within %s source", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MODULE_IN_METHOD]                   = { "unexpected module definition in method body", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MODULE_NAME]                        = { "unexpected constant path after `module`; class/module name must be CONSTANT", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MODULE_TERM]                        = { "expected an `end` to close the `module` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS]          = { "multiple splats in multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST]       = { "unexpected '%.*s' resulting in multiple splats in multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NESTING_TOO_DEEP]                   = { "nesting too deep", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NO_LOCAL_VARIABLE]                  = { "%.*s: no such local variable", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NON_ASSOCIATIVE_OPERATOR]           = { "unexpected %s; %s is a non-associative operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NOT_EXPRESSION]                     = { "expected an expression after `not`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NUMBER_LITERAL_UNDERSCORE]          = { "number literal ending with a `_`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK]     = { "numbered parameter is already used in inner block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NUMBERED_PARAMETER_IT]              = { "numbered parameters are not allowed when 'it' is already used", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NUMBERED_PARAMETER_ORDINARY]        = { "numbered parameters are not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK]     = { "numbered parameter is already used in outer block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_OPERATOR_MULTI_ASSIGN]              = { "unexpected operator for a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_OPERATOR_WRITE_ARGUMENTS]           = { "unexpected operator after a call with arguments", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_OPERATOR_WRITE_BLOCK]               = { "unexpected operator after a call with a block", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI]        = { "unexpected multiple `**` splat parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_BLOCK_MULTI]              = { "multiple block parameters; only one block is allowed", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_CIRCULAR]                 = { "circular argument reference - %.*s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_FORWARDING_AFTER_REST]    = { "... after rest argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_METHOD_NAME]              = { "unexpected name for a parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_NAME_DUPLICATED]          = { "duplicated argument name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_NO_DEFAULT]               = { "expected a default value for the parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_NO_DEFAULT_KW]            = { "expected a default value for the keyword parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_NUMBERED_RESERVED]        = { "%.2s is reserved for numbered parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_ORDER]                    = { "unexpected parameter order", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_SPLAT_MULTI]              = { "unexpected multiple `*` splat parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_STAR]                     = { "unexpected parameter `*`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_UNEXPECTED_FWD]           = { "unexpected `...` in parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_WILD_LOOSE_COMMA]         = { "unexpected `,` in parameters", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PARAMETER_UNEXPECTED_NO_KW]         = { "unexpected **nil; no keywords marker disallowed after keywords", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS]       = { "unexpected multiple '*' rest patterns in an array pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_CAPTURE_DUPLICATE]          = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE]     = { "variable capture in alternative pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET]   = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA]     = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET]   = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_IN]        = { "expected a pattern expression after the `in` keyword", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_KEY]       = { "expected a pattern expression after the key", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN]     = { "expected a pattern expression after the `(` operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_PIN]       = { "expected a pattern expression after the `^` pin operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE]      = { "expected a pattern expression after the `|` operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE]     = { "expected a pattern expression after the range operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_EXPRESSION_AFTER_REST]      = { "unexpected pattern expression after the `**` expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_FIND_MISSING_INNER]         = { "find patterns need at least one required inner pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_IMPLICIT]              = { "unexpected implicit hash in pattern; use '{' to delineate", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_KEY]                   = { "unexpected %s; expected a key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_KEY_DUPLICATE]         = { "duplicated key name", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_KEY_INTERPOLATED]      = { "symbol literal with interpolation is not allowed", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_KEY_LABEL]             = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_HASH_KEY_LOCALS]            = { "key must be valid as local variables", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_IDENT_AFTER_HROCKET]        = { "expected an identifier after the `=>` operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_LABEL_AFTER_COMMA]          = { "expected a label after the `,` in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_REST]                       = { "unexpected rest pattern", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_TERM_BRACE]                 = { "expected a `}` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_TERM_BRACKET]               = { "expected a `]` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PATTERN_TERM_PAREN]                 = { "expected a `)` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN]            = { "unexpected `||=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH]    = { "regexp encoding option '%c' differs from source encoding '%s'", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING]      = { "incompatible character encoding: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_NON_ESCAPED_MBC]             = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_INVALID_UNICODE_RANGE]       = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_PARSE_ERROR]                 = { "%s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_UNKNOWN_OPTIONS]             = { "unknown regexp %s - %.*s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_TERM]                        = { "unterminated regexp meets end of file; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP]   = { "UTF-8 character in non UTF-8 regexp: /%s/", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_RESCUE_EXPRESSION]                  = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_RESCUE_MODIFIER_VALUE]              = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_RESCUE_TERM]                        = { "expected a closing delimiter for the `rescue` clause", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_RESCUE_VARIABLE]                    = { "expected an exception variable after `=>` in a rescue statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_RETURN_INVALID]                     = { "Invalid return in class/module body", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_SINGLETON_FOR_LITERALS]             = { "cannot define singleton method for literals", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STATEMENT_ALIAS]                    = { "unexpected an `alias` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STATEMENT_POSTEXE_END]              = { "unexpected an `END` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STATEMENT_PREEXE_BEGIN]             = { "unexpected a `BEGIN` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STATEMENT_UNDEF]                    = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STRING_CONCATENATION]               = { "expected a string for concatenation", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STRING_INTERPOLATED_TERM]           = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STRING_LITERAL_EOF]                 = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_STRING_LITERAL_TERM]                = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_SYMBOL_INVALID]                     = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719
+    [PM_ERR_SYMBOL_TERM_DYNAMIC]                = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_SYMBOL_TERM_INTERPOLATED]           = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_TERNARY_COLON]                      = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_TERNARY_EXPRESSION_FALSE]           = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_TERNARY_EXPRESSION_TRUE]            = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNARY_RECEIVER]                     = { "unexpected %s, expected a receiver for unary `%c`", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNARY_DISALLOWED]                   = { "unexpected %s; unary calls are not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNDEF_ARGUMENT]                     = { "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_BLOCK_ARGUMENT]          = { "block argument should not be given", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_INDEX_BLOCK]             = { "unexpected block arg given in index assignment; blocks are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_INDEX_KEYWORDS]          = { "unexpected keyword arg given in index assignment; keywords are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_LABEL]                   = { "unexpected label", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_MULTI_WRITE]             = { "unexpected multiple assignment; multiple assignment is not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE] = { "unexpected %s; expected a default value for a parameter", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_RANGE_OPERATOR]          = { "unexpected range operator; .. and ... are non-associative and cannot be chained", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_SAFE_NAVIGATION]         = { "&. inside multiple assignment destination", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT]     = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNEXPECTED_TOKEN_IGNORE]            = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_UNTIL_TERM]                         = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_VOID_EXPRESSION]                    = { "unexpected void value expression", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_WHILE_TERM]                         = { "expected an `end` to close the `while` statement", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_WRITE_TARGET_IN_METHOD]             = { "dynamic constant assignment", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_WRITE_TARGET_READONLY]              = { "Can't set variable %.*s", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_WRITE_TARGET_UNEXPECTED]            = { "unexpected write target", PM_ERROR_LEVEL_SYNTAX },
+    [PM_ERR_XSTRING_TERM]                       = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_SYNTAX },
+
+    // Warnings
+    [PM_WARN_AMBIGUOUS_BINARY_OPERATOR]         = { "'%s' after local variable or literal is interpreted as binary operator even though it seems like %s", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS]    = { "ambiguous first argument; put parentheses or a space even after `-` operator", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS]     = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND]        = { "ambiguous `&` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_AMBIGUOUS_PREFIX_STAR]             = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR]        = { "ambiguous `**` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_AMBIGUOUS_SLASH]                   = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_COMPARISON_AFTER_COMPARISON]       = { "comparison '%.*s' after comparison", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_DOT_DOT_DOT_EOL]                   = { "... at EOL, should be parenthesized?", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_DUPLICATED_HASH_KEY]               = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_DUPLICATED_WHEN_CLAUSE]            = { "'when' clause on line %" PRIi32 " duplicates 'when' clause on line %" PRIi32 " and is ignored", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_EQUAL_IN_CONDITIONAL_3_3]          = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_EQUAL_IN_CONDITIONAL]              = { "found '= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_END_IN_METHOD]                     = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_FLOAT_OUT_OF_RANGE]                = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_IGNORED_FROZEN_STRING_LITERAL]     = { "'frozen_string_literal' is ignored after any tokens", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_INDENTATION_MISMATCH]              = { "mismatched indentations at '%.*s' with '%.*s' at %" PRIi32, PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_INTEGER_IN_FLIP_FLOP]              = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_INVALID_CHARACTER]                 = { "invalid character syntax; use %s%s%s", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_INVALID_MAGIC_COMMENT_VALUE]       = { "invalid value for %.*s: %.*s", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_INVALID_NUMBERED_REFERENCE]        = { "'%.*s' is too big for a number variable, always nil", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_KEYWORD_EOL]                       = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_LITERAL_IN_CONDITION_DEFAULT]      = { "%sliteral in %s", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_LITERAL_IN_CONDITION_VERBOSE]      = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE]     = { "'shareable_constant_value' is ignored unless in comment-only line", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_SHEBANG_CARRIAGE_RETURN]           = { "shebang line ending with \\r may cause problems", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_UNEXPECTED_CARRIAGE_RETURN]        = { "encountered \\r in middle of line, treated as a mere space", PM_WARNING_LEVEL_DEFAULT },
+    [PM_WARN_UNREACHABLE_STATEMENT]             = { "statement not reached", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_UNUSED_LOCAL_VARIABLE]             = { "assigned but unused variable - %.*s", PM_WARNING_LEVEL_VERBOSE },
+    [PM_WARN_VOID_STATEMENT]                    = { "possibly useless use of %.*s in void context", PM_WARNING_LEVEL_VERBOSE }
+};
+
+/**
+ * Get the human-readable name of the given diagnostic ID.
+ */
+const char *
+pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) {
+    switch (diag_id) {
+        <%- errors.each do |error| -%>
+        case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>";
+        <%- end -%>
+        <%- warnings.each do |warning| -%>
+        case PM_WARN_<%= warning.name %>: return "<%= warning.name.downcase %>";
+        <%- end -%>
+    }
+
+    assert(false && "unreachable");
+    return "";
+}
+
+static inline const char *
+pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
+    assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
+
+    const char *message = diagnostic_messages[diag_id].message;
+    assert(message);
+
+    return message;
+}
+
+static inline uint8_t
+pm_diagnostic_level(pm_diagnostic_id_t diag_id) {
+    assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
+
+    return (uint8_t) diagnostic_messages[diag_id].level;
+}
+
+/**
+ * Append an error to the given list of diagnostic.
+ */
+bool
+pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
+    if (diagnostic == NULL) return false;
+
+    *diagnostic = (pm_diagnostic_t) {
+        .location = { start, end },
+        .diag_id = diag_id,
+        .message = pm_diagnostic_message(diag_id),
+        .owned = false,
+        .level = pm_diagnostic_level(diag_id)
+    };
+
+    pm_list_append(list, (pm_list_node_t *) diagnostic);
+    return true;
+}
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ */
+bool
+pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) {
+    va_list arguments;
+    va_start(arguments, diag_id);
+
+    const char *format = pm_diagnostic_message(diag_id);
+    int result = vsnprintf(NULL, 0, format, arguments);
+    va_end(arguments);
+
+    if (result < 0) {
+        return false;
+    }
+
+    pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
+    if (diagnostic == NULL) {
+        return false;
+    }
+
+    size_t length = (size_t) (result + 1);
+    char *message = (char *) xmalloc(length);
+    if (message == NULL) {
+        xfree(diagnostic);
+        return false;
+    }
+
+    va_start(arguments, diag_id);
+    vsnprintf(message, length, format, arguments);
+    va_end(arguments);
+
+    *diagnostic = (pm_diagnostic_t) {
+        .location = { start, end },
+        .diag_id = diag_id,
+        .message = message,
+        .owned = true,
+        .level = pm_diagnostic_level(diag_id)
+    };
+
+    pm_list_append(list, (pm_list_node_t *) diagnostic);
+    return true;
+}
+
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ */
+void
+pm_diagnostic_list_free(pm_list_t *list) {
+    pm_diagnostic_t *node = (pm_diagnostic_t *) list->head;
+
+    while (node != NULL) {
+        pm_diagnostic_t *next = (pm_diagnostic_t *) node->node.next;
+
+        if (node->owned) xfree((void *) node->message);
+        xfree(node);
+
+        node = next;
+    }
+}
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
new file mode 100644
index 0000000000..2357e55200
--- /dev/null
+++ b/prism/templates/src/node.c.erb
@@ -0,0 +1,333 @@
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+#include "prism/node.h"
+
+/**
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it reallocates
+ * the list to be twice as large as it was before. If the reallocation fails,
+ * this function returns false, otherwise it returns true.
+ */
+static bool
+pm_node_list_grow(pm_node_list_t *list, size_t size) {
+    size_t requested_size = list->size + size;
+
+    // If the requested size caused overflow, return false.
+    if (requested_size < list->size) return false;
+
+    // If the requested size is within the existing capacity, return true.
+    if (requested_size < list->capacity) return true;
+
+    // Otherwise, reallocate the list to be twice as large as it was before.
+    size_t next_capacity = list->capacity == 0 ? 4 : list->capacity * 2;
+
+    // If multiplying by 2 caused overflow, return false.
+    if (next_capacity < list->capacity) return false;
+
+    // If we didn't get enough by doubling, keep doubling until we do.
+    while (requested_size > next_capacity) {
+        size_t double_capacity = next_capacity * 2;
+
+        // Ensure we didn't overflow by multiplying by 2.
+        if (double_capacity < next_capacity) return false;
+        next_capacity = double_capacity;
+    }
+
+    pm_node_t **nodes = (pm_node_t **) xrealloc(list->nodes, sizeof(pm_node_t *) * next_capacity);
+    if (nodes == NULL) return false;
+
+    list->nodes = nodes;
+    list->capacity = next_capacity;
+    return true;
+}
+
+/**
+ * Append a new node onto the end of the node list.
+ */
+void
+pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
+    if (pm_node_list_grow(list, 1)) {
+        list->nodes[list->size++] = node;
+    }
+}
+
+/**
+ * Prepend a new node onto the beginning of the node list.
+ */
+void
+pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node) {
+    if (pm_node_list_grow(list, 1)) {
+        memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
+        list->nodes[0] = node;
+        list->size++;
+    }
+}
+
+/**
+ * Concatenate the given node list onto the end of the other node list.
+ */
+void
+pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other) {
+    if (other->size > 0 && pm_node_list_grow(list, other->size)) {
+        memcpy(list->nodes + list->size, other->nodes, other->size * sizeof(pm_node_t *));
+        list->size += other->size;
+    }
+}
+
+/**
+ * Free the internal memory associated with the given node list.
+ */
+void
+pm_node_list_free(pm_node_list_t *list) {
+    if (list->capacity > 0) {
+        xfree(list->nodes);
+        *list = (pm_node_list_t) { 0 };
+    }
+}
+
+PRISM_EXPORTED_FUNCTION void
+pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
+
+/**
+ * Destroy the nodes that are contained within the given node list.
+ */
+static void
+pm_node_list_destroy(pm_parser_t *parser, pm_node_list_t *list) {
+    pm_node_t *node;
+    PM_NODE_LIST_FOREACH(list, index, node) pm_node_destroy(parser, node);
+    pm_node_list_free(list);
+}
+
+/**
+ * Deallocate the space for a pm_node_t. Similarly to pm_node_alloc, we're not
+ * using the parser argument, but it's there to allow for the future possibility
+ * of pre-allocating larger memory pools.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        <%- nodes.each do |node| -%>
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+        case <%= node.type %>: {
+            <%- if node.fields.any? { |field| ![Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField].include?(field.class) } -%>
+            pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+            <%- end -%>
+            <%- node.fields.each do |field| -%>
+            <%- case field -%>
+            <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField -%>
+            <%- when Prism::Template::NodeField -%>
+            pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalNodeField -%>
+            if (cast-><%= field.name %> != NULL) {
+                pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
+            }
+            <%- when Prism::Template::StringField -%>
+            pm_string_free(&cast-><%= field.name %>);
+            <%- when Prism::Template::NodeListField -%>
+            pm_node_list_destroy(parser, &cast-><%= field.name %>);
+            <%- when Prism::Template::ConstantListField -%>
+            pm_constant_id_list_free(&cast-><%= field.name %>);
+            <%- when Prism::Template::IntegerField -%>
+            pm_integer_free(&cast-><%= field.name %>);
+            <%- else -%>
+            <%- raise -%>
+            <%- end -%>
+            <%- end -%>
+            break;
+        }
+        <%- end -%>
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+        default:
+            assert(false && "unreachable");
+            break;
+    }
+    xfree(node);
+}
+
+/**
+ * Returns a string representation of the given node type.
+ */
+PRISM_EXPORTED_FUNCTION const char *
+pm_node_type_to_str(pm_node_type_t node_type)
+{
+    switch (node_type) {
+<%- nodes.each do |node| -%>
+        case <%= node.type %>:
+            return "<%= node.type %>";
+<%- end -%>
+    }
+    return "";
+}
+
+/**
+ * Visit each of the nodes in this subtree using the given visitor callback. The
+ * callback function will be called for each node in the subtree. If it returns
+ * false, then that node's children will not be visited. If it returns true,
+ * then the children will be visited. The data parameter is treated as an opaque
+ * pointer and is passed to the visitor callback for consumers to use as they
+ * see fit.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
+    if (visitor(node, data)) pm_visit_child_nodes(node, visitor, data);
+}
+
+/**
+ * Visit the children of the given node with the given callback. This is the
+ * default behavior for walking the tree that is called from pm_visit_node if
+ * the callback returns true.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
+    switch (PM_NODE_TYPE(node)) {
+        <%- nodes.each do |node| -%>
+        <%- if (fields = node.fields.select { |field| field.is_a?(Prism::Template::NodeField) || field.is_a?(Prism::Template::OptionalNodeField) || field.is_a?(Prism::Template::NodeListField) }).any? -%>
+        case <%= node.type %>: {
+            const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+            <%- fields.each do |field| -%>
+
+            // Visit the <%= field.name %> field
+            <%- case field -%>
+            <%- when Prism::Template::NodeField -%>
+            pm_visit_node((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+            <%- when Prism::Template::OptionalNodeField -%>
+            if (cast-><%= field.name %> != NULL) {
+                pm_visit_node((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+            }
+            <%- when Prism::Template::NodeListField -%>
+            const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+            for (size_t index = 0; index < <%= field.name %>->size; index++) {
+                pm_visit_node(<%= field.name %>->nodes[index], visitor, data);
+            }
+            <%- end -%>
+            <%- end -%>
+
+            break;
+        }
+        <%- else -%>
+        case <%= node.type %>:
+            break;
+        <%- end -%>
+        <%- end -%>
+        case PM_SCOPE_NODE:
+            break;
+    }
+}
+
+// We optionally support dumping to JSON. For systems that don't want or need
+// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
+#ifndef PRISM_EXCLUDE_JSON
+
+static void
+pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
+    const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+    pm_buffer_append_byte(buffer, '"');
+    pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
+    pm_buffer_append_byte(buffer, '"');
+}
+
+static void
+pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) {
+    uint32_t start = (uint32_t) (location->start - parser->start);
+    uint32_t end = (uint32_t) (location->end - parser->start);
+    pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end);
+}
+
+/**
+ * Dump JSON to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
+        <%- nodes.each do |node| -%>
+        case <%= node.type %>: {
+            pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
+
+            const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+            pm_dump_json_location(buffer, parser, &cast->base.location);
+            <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
+
+            // Dump the <%= field.name %> field
+            pm_buffer_append_byte(buffer, ',');
+            pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
+            <%- case field -%>
+            <%- when Prism::Template::NodeField -%>
+            pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalNodeField -%>
+            if (cast-><%= field.name %> != NULL) {
+                pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+            } else {
+                pm_buffer_append_string(buffer, "null", 4);
+            }
+            <%- when Prism::Template::NodeListField -%>
+            const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+            pm_buffer_append_byte(buffer, '[');
+
+            for (size_t index = 0; index < <%= field.name %>->size; index++) {
+                if (index != 0) pm_buffer_append_byte(buffer, ',');
+                pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
+            }
+            pm_buffer_append_byte(buffer, ']');
+            <%- when Prism::Template::StringField -%>
+            const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
+            pm_buffer_append_byte(buffer, '"');
+            pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
+            pm_buffer_append_byte(buffer, '"');
+            <%- when Prism::Template::ConstantField -%>
+            pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalConstantField -%>
+            if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
+                pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+            } else {
+                pm_buffer_append_string(buffer, "null", 4);
+            }
+            <%- when Prism::Template::ConstantListField -%>
+            const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
+            pm_buffer_append_byte(buffer, '[');
+
+            for (size_t index = 0; index < <%= field.name %>->size; index++) {
+                if (index != 0) pm_buffer_append_byte(buffer, ',');
+                pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
+            }
+            pm_buffer_append_byte(buffer, ']');
+            <%- when Prism::Template::LocationField -%>
+            pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
+            <%- when Prism::Template::OptionalLocationField -%>
+            if (cast-><%= field.name %>.start != NULL) {
+                pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
+            } else {
+                pm_buffer_append_string(buffer, "null", 4);
+            }
+            <%- when Prism::Template::UInt8Field -%>
+            pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
+            <%- when Prism::Template::UInt32Field -%>
+            pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
+            <%- when Prism::Template::Flags -%>
+            size_t flags = 0;
+            pm_buffer_append_byte(buffer, '[');
+            <%- node.flags.values.each_with_index do |value, index| -%>
+            if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
+                if (flags != 0) pm_buffer_append_byte(buffer, ',');
+                pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
+                flags++;
+            }
+            <%- end -%>
+            pm_buffer_append_byte(buffer, ']');
+            <%- when Prism::Template::IntegerField -%>
+            pm_integer_string(buffer, &cast-><%= field.name %>);
+            <%- when Prism::Template::DoubleField -%>
+            pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
+            <%- else -%>
+            <%- raise %>
+            <%- end -%>
+            <%- end -%>
+
+            pm_buffer_append_byte(buffer, '}');
+            break;
+        }
+        <%- end -%>
+        case PM_SCOPE_NODE:
+            break;
+    }
+}
+
+#endif
diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb
new file mode 100644
index 0000000000..639c2fecf3
--- /dev/null
+++ b/prism/templates/src/prettyprint.c.erb
@@ -0,0 +1,166 @@
+<%# encoding: ASCII -%>
+#include "prism/prettyprint.h"
+
+// We optionally support pretty printing nodes. For systems that don't want or
+// need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_PRETTYPRINT define.
+#ifdef PRISM_EXCLUDE_PRETTYPRINT
+
+void pm_prettyprint(void) {}
+
+#else
+
+static inline void
+prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
+    pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
+    pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line);
+    pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column);
+}
+
+static inline void
+prettyprint_constant(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_constant_id_t constant_id) {
+    pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+    pm_buffer_append_format(output_buffer, ":%.*s", (int) constant->length, constant->start);
+}
+
+static void
+prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node, pm_buffer_t *prefix_buffer) {
+    switch (PM_NODE_TYPE(node)) {
+        case PM_SCOPE_NODE:
+            // We do not need to print a ScopeNode as it's not part of the AST.
+            return;
+        <%- nodes.each do |node| -%>
+        case <%= node.type %>: {
+            <%- if !node.flags.nil? || node.fields.any? -%>
+            pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+            <%- end -%>
+            pm_buffer_append_string(output_buffer, "@ <%= node.name %> (location: ", <%= node.name.length + 14 %>);
+            prettyprint_location(output_buffer, parser, &node->location);
+            pm_buffer_append_string(output_buffer, ")\n", 2);
+            <%- (fields = [*node.flags, *node.fields]).each_with_index do |field, index| -%>
+            <%- preadd = index == fields.length - 1 ? "    " : "|   " -%>
+
+            // <%= field.name %>
+            {
+                pm_buffer_concat(output_buffer, prefix_buffer);
+                pm_buffer_append_string(output_buffer, "+-- <%= field.name %>:", <%= 4 + field.name.length + 1 %>);
+            <%- case field -%>
+            <%- when Prism::Template::NodeField -%>
+                pm_buffer_append_byte(output_buffer, '\n');
+
+                size_t prefix_length = prefix_buffer->length;
+                pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
+                pm_buffer_concat(output_buffer, prefix_buffer);
+                prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
+                prefix_buffer->length = prefix_length;
+            <%- when Prism::Template::OptionalNodeField -%>
+                if (cast-><%= field.name %> == NULL) {
+                    pm_buffer_append_string(output_buffer, " nil\n", 5);
+                } else {
+                    pm_buffer_append_byte(output_buffer, '\n');
+
+                    size_t prefix_length = prefix_buffer->length;
+                    pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
+                    pm_buffer_concat(output_buffer, prefix_buffer);
+                    prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
+                    prefix_buffer->length = prefix_length;
+                }
+            <%- when Prism::Template::StringField -%>
+                pm_buffer_append_string(output_buffer, " \"", 2);
+                pm_buffer_append_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>), PM_BUFFER_ESCAPING_RUBY);
+                pm_buffer_append_string(output_buffer, "\"\n", 2);
+            <%- when Prism::Template::NodeListField -%>
+                pm_buffer_append_format(output_buffer, " (length: %lu)\n", (unsigned long) (cast-><%= field.name %>.size));
+
+                size_t last_index = cast-><%= field.name %>.size;
+                for (uint32_t index = 0; index < last_index; index++) {
+                    size_t prefix_length = prefix_buffer->length;
+                    pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
+                    pm_buffer_concat(output_buffer, prefix_buffer);
+                    pm_buffer_append_string(output_buffer, "+-- ", 4);
+                    pm_buffer_append_string(prefix_buffer, (index == last_index - 1) ? "    " : "|   ", 4);
+                    prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>.nodes[index], prefix_buffer);
+                    prefix_buffer->length = prefix_length;
+                }
+            <%- when Prism::Template::ConstantField -%>
+                pm_buffer_append_byte(output_buffer, ' ');
+                prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
+                pm_buffer_append_byte(output_buffer, '\n');
+            <%- when Prism::Template::OptionalConstantField -%>
+                if (cast-><%= field.name %> == 0) {
+                    pm_buffer_append_string(output_buffer, " nil\n", 5);
+                } else {
+                    pm_buffer_append_byte(output_buffer, ' ');
+                    prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
+                    pm_buffer_append_byte(output_buffer, '\n');
+                }
+            <%- when Prism::Template::ConstantListField -%>
+                pm_buffer_append_string(output_buffer, " [", 2);
+                for (uint32_t index = 0; index < cast-><%= field.name %>.size; index++) {
+                    if (index != 0) pm_buffer_append_string(output_buffer, ", ", 2);
+                    prettyprint_constant(output_buffer, parser, cast-><%= field.name %>.ids[index]);
+                }
+                pm_buffer_append_string(output_buffer, "]\n", 2);
+            <%- when Prism::Template::LocationField -%>
+                pm_location_t *location = &cast-><%= field.name %>;
+                pm_buffer_append_byte(output_buffer, ' ');
+                prettyprint_location(output_buffer, parser, location);
+                pm_buffer_append_string(output_buffer, " = \"", 4);
+                pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
+                pm_buffer_append_string(output_buffer, "\"\n", 2);
+            <%- when Prism::Template::OptionalLocationField -%>
+                pm_location_t *location = &cast-><%= field.name %>;
+                if (location->start == NULL) {
+                    pm_buffer_append_string(output_buffer, " nil\n", 5);
+                } else {
+                    pm_buffer_append_byte(output_buffer, ' ');
+                    prettyprint_location(output_buffer, parser, location);
+                    pm_buffer_append_string(output_buffer, " = \"", 4);
+                    pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
+                    pm_buffer_append_string(output_buffer, "\"\n", 2);
+                }
+            <%- when Prism::Template::UInt8Field -%>
+                pm_buffer_append_format(output_buffer, " %" PRIu8 "\n", cast-><%= field.name %>);
+            <%- when Prism::Template::UInt32Field -%>
+                pm_buffer_append_format(output_buffer, " %" PRIu32 "\n", cast-><%= field.name %>);
+            <%- when Prism::Template::Flags -%>
+                bool found = false;
+                <%- field.values.each do |value| -%>
+                if (cast->base.flags & PM_<%= field.human.upcase %>_<%= value.name %>) {
+                    if (found) pm_buffer_append_byte(output_buffer, ',');
+                    pm_buffer_append_string(output_buffer, " <%= value.name.downcase %>", <%= value.name.bytesize + 1 %>);
+                    found = true;
+                }
+                <%- end -%>
+                if (!found) pm_buffer_append_string(output_buffer, " nil", 4);
+                pm_buffer_append_byte(output_buffer, '\n');
+            <%- when Prism::Template::IntegerField -%>
+                const pm_integer_t *integer = &cast-><%= field.name %>;
+                pm_buffer_append_byte(output_buffer, ' ');
+                pm_integer_string(output_buffer, integer);
+                pm_buffer_append_byte(output_buffer, '\n');
+            <%- when Prism::Template::DoubleField -%>
+                pm_buffer_append_format(output_buffer, " %f\n", cast-><%= field.name %>);
+            <%- else -%>
+            <%- raise -%>
+            <%- end -%>
+            }
+            <%- end -%>
+
+            break;
+        }
+        <%- end -%>
+    }
+}
+
+/**
+ * Pretty-prints the AST represented by the given node to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) {
+    pm_buffer_t prefix_buffer = { 0 };
+    prettyprint_node(output_buffer, parser, node, &prefix_buffer);
+    pm_buffer_free(&prefix_buffer);
+}
+
+#endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
new file mode 100644
index 0000000000..0f0aace445
--- /dev/null
+++ b/prism/templates/src/serialize.c.erb
@@ -0,0 +1,406 @@
+#include "prism.h"
+
+// We optionally support serializing to a binary string. For systems that don't
+// want or need this functionality, it can be turned off with the
+// PRISM_EXCLUDE_SERIALIZATION define.
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+#include <stdio.h>
+
+static inline uint32_t
+pm_ptrdifft_to_u32(ptrdiff_t value) {
+    assert(value >= 0 && ((unsigned long) value) < UINT32_MAX);
+    return (uint32_t) value;
+}
+
+static inline uint32_t
+pm_sizet_to_u32(size_t value) {
+    assert(value < UINT32_MAX);
+    return (uint32_t) value;
+}
+
+static void
+pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) {
+    assert(location->start);
+    assert(location->end);
+    assert(location->start <= location->end);
+
+    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->start - parser->start));
+    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->end - location->start));
+}
+
+static void
+pm_serialize_string(const pm_parser_t *parser, const pm_string_t *string, pm_buffer_t *buffer) {
+    switch (string->type) {
+        case PM_STRING_SHARED: {
+            pm_buffer_append_byte(buffer, 1);
+            pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(pm_string_source(string) - parser->start));
+            pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_string_length(string)));
+            break;
+        }
+        case PM_STRING_OWNED:
+        case PM_STRING_CONSTANT: {
+            uint32_t length = pm_sizet_to_u32(pm_string_length(string));
+            pm_buffer_append_byte(buffer, 2);
+            pm_buffer_append_varuint(buffer, length);
+            pm_buffer_append_bytes(buffer, pm_string_source(string), length);
+            break;
+        }
+#ifdef PRISM_HAS_MMAP
+        case PM_STRING_MAPPED:
+            assert(false && "Cannot serialize mapped strings.");
+            break;
+#endif
+    }
+}
+
+static void
+pm_serialize_integer(const pm_integer_t *integer, pm_buffer_t *buffer) {
+    pm_buffer_append_byte(buffer, integer->negative ? 1 : 0);
+    if (integer->values == NULL) {
+        pm_buffer_append_varuint(buffer, pm_sizet_to_u32(1));
+        pm_buffer_append_varuint(buffer, integer->value);
+    } else {
+        pm_buffer_append_varuint(buffer, pm_sizet_to_u32(integer->length));
+        for (size_t i = 0; i < integer->length; i++) {
+            pm_buffer_append_varuint(buffer, integer->values[i]);
+        }
+    }
+}
+
+static void
+pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+    pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
+
+    size_t offset = buffer->length;
+
+    <%- if Prism::Template::INCLUDE_NODE_ID -%>
+    pm_buffer_append_varuint(buffer, node->node_id);
+    <%- end -%>
+    pm_serialize_location(parser, &node->location, buffer);
+
+    switch (PM_NODE_TYPE(node)) {
+        // We do not need to serialize a ScopeNode ever as
+        // it is not part of the AST
+        case PM_SCOPE_NODE:
+            return;
+        <%- nodes.each do |node| -%>
+        case <%= node.type %>: {
+            <%- if node.needs_serialized_length? -%>
+            // serialize length
+            // encoding of location u32s make us need to save this offset.
+            size_t length_offset = buffer->length;
+            pm_buffer_append_string(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */
+            <%- end -%>
+            <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS && !node.flags -%>
+            pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
+            <%- end -%>
+            <%- node.fields.each do |field| -%>
+            <%- case field -%>
+            <%- when Prism::Template::NodeField -%>
+            pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            <%- when Prism::Template::OptionalNodeField -%>
+            if (((pm_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
+                pm_buffer_append_byte(buffer, 0);
+            } else {
+                pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            }
+            <%- when Prism::Template::StringField -%>
+            pm_serialize_string(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            <%- when Prism::Template::NodeListField -%>
+            uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
+            pm_buffer_append_varuint(buffer, <%= field.name %>_size);
+            for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
+                pm_serialize_node(parser, (pm_node_t *) ((pm_<%= node.human %>_t *)node)-><%= field.name %>.nodes[index], buffer);
+            }
+            <%- when Prism::Template::ConstantField, Prism::Template::OptionalConstantField -%>
+            pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>));
+            <%- when Prism::Template::ConstantListField -%>
+            uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
+            pm_buffer_append_varuint(buffer, <%= field.name %>_size);
+            for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
+                pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
+            }
+            <%- when Prism::Template::LocationField -%>
+            <%- if field.should_be_serialized? -%>
+            pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            <%- end -%>
+            <%- when Prism::Template::OptionalLocationField -%>
+            <%- if field.should_be_serialized? -%>
+            if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
+                pm_buffer_append_byte(buffer, 0);
+            } else {
+                pm_buffer_append_byte(buffer, 1);
+                pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            }
+            <%- end -%>
+            <%- when Prism::Template::UInt8Field -%>
+            pm_buffer_append_byte(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
+            <%- when Prism::Template::UInt32Field -%>
+            pm_buffer_append_varuint(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
+            <%- when Prism::Template::IntegerField -%>
+            pm_serialize_integer(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+            <%- when Prism::Template::DoubleField -%>
+            pm_buffer_append_double(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
+            <%- else -%>
+            <%- raise -%>
+            <%- end -%>
+            <%- end -%>
+            <%- if node.needs_serialized_length? -%>
+            // serialize length
+            uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
+            memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
+            <%- end -%>
+            break;
+        }
+        <%- end -%>
+    }
+}
+
+static void
+pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
+    uint32_t size = pm_sizet_to_u32(list->size);
+    pm_buffer_append_varuint(buffer, size);
+
+    for (uint32_t i = 0; i < size; i++) {
+        uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
+        pm_buffer_append_varuint(buffer, offset);
+    }
+}
+
+static void
+pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
+    // serialize type
+    pm_buffer_append_byte(buffer, (uint8_t) comment->type);
+
+    // serialize location
+    pm_serialize_location(parser, &comment->location, buffer);
+}
+
+/**
+ * Serialize the given list of comments to the given buffer.
+ */
+void
+pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+    pm_comment_t *comment;
+    for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
+        pm_serialize_comment(parser, comment, buffer);
+    }
+}
+
+static void
+pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
+    // serialize key location
+    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->key_length));
+
+    // serialize value location
+    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->value_length));
+}
+
+static void
+pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+    pm_magic_comment_t *magic_comment;
+    for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
+        pm_serialize_magic_comment(parser, magic_comment, buffer);
+    }
+}
+
+static void
+pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
+    if (parser->data_loc.end == NULL) {
+        pm_buffer_append_byte(buffer, 0);
+    } else {
+        pm_buffer_append_byte(buffer, 1);
+        pm_serialize_location(parser, &parser->data_loc, buffer);
+    }
+}
+
+static void
+pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
+    // serialize the type
+    pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id);
+
+    // serialize message
+    size_t message_length = strlen(diagnostic->message);
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(message_length));
+    pm_buffer_append_string(buffer, diagnostic->message, message_length);
+
+    // serialize location
+    pm_serialize_location(parser, &diagnostic->location, buffer);
+
+    pm_buffer_append_byte(buffer, diagnostic->level);
+}
+
+static void
+pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+    pm_diagnostic_t *diagnostic;
+    for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
+        pm_serialize_diagnostic(parser, diagnostic, buffer);
+    }
+}
+
+/**
+ * Serialize the name of the encoding to the buffer.
+ */
+void
+pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
+    size_t encoding_length = strlen(encoding->name);
+    pm_buffer_append_varuint(buffer, pm_sizet_to_u32(encoding_length));
+    pm_buffer_append_string(buffer, encoding->name, encoding_length);
+}
+
+static void
+pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
+    pm_serialize_encoding(parser->encoding, buffer);
+    pm_buffer_append_varsint(buffer, parser->start_line);
+    pm_serialize_newline_list(&parser->newline_list, buffer);
+<%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
+    pm_serialize_comment_list(parser, &parser->comment_list, buffer);
+<%- end -%>
+    pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
+    pm_serialize_data_loc(parser, buffer);
+    pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
+    pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
+}
+
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+/**
+ * Serialize the metadata, nodes, and constant pool.
+ */
+void
+pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+    pm_serialize_metadata(parser, buffer);
+
+    // Here we're going to leave space for the offset of the constant pool in
+    // the buffer.
+    size_t offset = buffer->length;
+    pm_buffer_append_zeroes(buffer, 4);
+
+    // Next, encode the length of the constant pool.
+    pm_buffer_append_varuint(buffer, parser->constant_pool.size);
+
+    // Now we're going to serialize the content of the node.
+    pm_serialize_node(parser, node, buffer);
+
+    // Now we're going to serialize the offset of the constant pool back where
+    // we left space for it.
+    uint32_t length = pm_sizet_to_u32(buffer->length);
+    memcpy(buffer->value + offset, &length, sizeof(uint32_t));
+
+    // Now we're going to serialize the constant pool.
+    offset = buffer->length;
+    pm_buffer_append_zeroes(buffer, parser->constant_pool.size * 8);
+
+    for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) {
+        pm_constant_pool_bucket_t *bucket = &parser->constant_pool.buckets[index];
+
+        // If we find a constant at this index, serialize it at the correct
+        // index in the buffer.
+        if (bucket->id != 0) {
+            pm_constant_t *constant = &parser->constant_pool.constants[bucket->id - 1];
+            size_t buffer_offset = offset + ((((size_t)bucket->id) - 1) * 8);
+
+            if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED || bucket->type == PM_CONSTANT_POOL_BUCKET_CONSTANT) {
+                // Since this is an owned or constant constant, we are going to
+                // write its contents into the buffer after the constant pool.
+                // So effectively in place of the source offset, we have a
+                // buffer offset. We will add a leading 1 to indicate that this
+                // is a buffer offset.
+                uint32_t content_offset = pm_sizet_to_u32(buffer->length);
+                uint32_t owned_mask = 1U << 31;
+
+                assert(content_offset < owned_mask);
+                content_offset |= owned_mask;
+
+                memcpy(buffer->value + buffer_offset, &content_offset, 4);
+                pm_buffer_append_bytes(buffer, constant->start, constant->length);
+            } else {
+                // Since this is a shared constant, we are going to write its
+                // source offset directly into the buffer.
+                uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start);
+                memcpy(buffer->value + buffer_offset, &source_offset, 4);
+            }
+
+            // Now we can write the length of the constant into the buffer.
+            uint32_t constant_length = pm_sizet_to_u32(constant->length);
+            memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
+        }
+    }
+}
+
+static void
+serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
+    pm_buffer_t *buffer = (pm_buffer_t *) data;
+
+    pm_buffer_append_varuint(buffer, token->type);
+    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(token->start - parser->start));
+    pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(token->end - token->start));
+    pm_buffer_append_varuint(buffer, parser->lex_state);
+}
+
+/**
+ * Lex the given source and serialize to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, source, size, &options);
+
+    pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
+        .data = (void *) buffer,
+        .callback = serialize_token,
+    };
+
+    parser.lex_callback = &lex_callback;
+    pm_node_t *node = pm_parse(&parser);
+
+    // Append 0 to mark end of tokens.
+    pm_buffer_append_byte(buffer, 0);
+
+    pm_serialize_metadata(&parser, buffer);
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+    pm_options_free(&options);
+}
+
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
+    pm_options_t options = { 0 };
+    pm_options_read(&options, data);
+
+    pm_parser_t parser;
+    pm_parser_init(&parser, source, size, &options);
+
+    pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
+        .data = (void *) buffer,
+        .callback = serialize_token,
+    };
+
+    parser.lex_callback = &lex_callback;
+    pm_node_t *node = pm_parse(&parser);
+
+    pm_buffer_append_byte(buffer, 0);
+    pm_serialize(&parser, node, buffer);
+
+    pm_node_destroy(&parser, node);
+    pm_parser_free(&parser);
+    pm_options_free(&options);
+}
+
+#endif
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb
new file mode 100644
index 0000000000..f196393ee1
--- /dev/null
+++ b/prism/templates/src/token_type.c.erb
@@ -0,0 +1,369 @@
+#include <string.h>
+
+#include "prism/ast.h"
+
+/**
+ * Returns a string representation of the given token type.
+ */
+PRISM_EXPORTED_FUNCTION const char *
+pm_token_type_name(pm_token_type_t token_type) {
+    switch (token_type) {
+<%- tokens.each do |token| -%>
+        case PM_TOKEN_<%= token.name %>:
+            return "<%= token.name %>";
+<%- end -%>
+        case PM_TOKEN_MAXIMUM:
+            assert(false && "unreachable");
+            return "";
+    }
+
+    // Provide a default, because some compilers can't determine that the above
+    // switch is exhaustive.
+    assert(false && "unreachable");
+    return "";
+}
+
+/**
+ * Returns the human name of the given token type.
+ */
+const char *
+pm_token_type_human(pm_token_type_t token_type) {
+    switch (token_type) {
+        case PM_TOKEN_EOF:
+            return "end-of-input";
+        case PM_TOKEN_MISSING:
+            return "missing token";
+        case PM_TOKEN_NOT_PROVIDED:
+            return "not provided token";
+        case PM_TOKEN_AMPERSAND:
+            return "'&'";
+        case PM_TOKEN_AMPERSAND_AMPERSAND:
+            return "'&&'";
+        case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+            return "'&&='";
+        case PM_TOKEN_AMPERSAND_DOT:
+            return "'&.'";
+        case PM_TOKEN_AMPERSAND_EQUAL:
+            return "'&='";
+        case PM_TOKEN_BACKTICK:
+            return "'`'";
+        case PM_TOKEN_BACK_REFERENCE:
+            return "back reference";
+        case PM_TOKEN_BANG:
+            return "'!'";
+        case PM_TOKEN_BANG_EQUAL:
+            return "'!='";
+        case PM_TOKEN_BANG_TILDE:
+            return "'!~'";
+        case PM_TOKEN_BRACE_LEFT:
+            return "'{'";
+        case PM_TOKEN_BRACE_RIGHT:
+            return "'}'";
+        case PM_TOKEN_BRACKET_LEFT:
+            return "'['";
+        case PM_TOKEN_BRACKET_LEFT_ARRAY:
+            return "'['";
+        case PM_TOKEN_BRACKET_LEFT_RIGHT:
+            return "'[]'";
+        case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
+            return "'[]='";
+        case PM_TOKEN_BRACKET_RIGHT:
+            return "']'";
+        case PM_TOKEN_CARET:
+            return "'^'";
+        case PM_TOKEN_CARET_EQUAL:
+            return "'^='";
+        case PM_TOKEN_CHARACTER_LITERAL:
+            return "character literal";
+        case PM_TOKEN_CLASS_VARIABLE:
+            return "class variable";
+        case PM_TOKEN_COLON:
+            return "':'";
+        case PM_TOKEN_COLON_COLON:
+            return "'::'";
+        case PM_TOKEN_COMMA:
+            return "','";
+        case PM_TOKEN_COMMENT:
+            return "comment";
+        case PM_TOKEN_CONSTANT:
+            return "constant";
+        case PM_TOKEN_DOT:
+            return "'.'";
+        case PM_TOKEN_DOT_DOT:
+            return "..";
+        case PM_TOKEN_DOT_DOT_DOT:
+            return "...";
+        case PM_TOKEN_EMBDOC_BEGIN:
+            return "'=begin'";
+        case PM_TOKEN_EMBDOC_END:
+            return "'=end'";
+        case PM_TOKEN_EMBDOC_LINE:
+            return "embedded documentation line";
+        case PM_TOKEN_EMBEXPR_BEGIN:
+            return "'#{'";
+        case PM_TOKEN_EMBEXPR_END:
+            return "'}'";
+        case PM_TOKEN_EMBVAR:
+            return "'#'";
+        case PM_TOKEN_EQUAL:
+            return "'='";
+        case PM_TOKEN_EQUAL_EQUAL:
+            return "'=='";
+        case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+            return "'==='";
+        case PM_TOKEN_EQUAL_GREATER:
+            return "'=>'";
+        case PM_TOKEN_EQUAL_TILDE:
+            return "'=~'";
+        case PM_TOKEN_FLOAT:
+            return "float";
+        case PM_TOKEN_FLOAT_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_FLOAT_RATIONAL:
+            return "rational";
+        case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_GLOBAL_VARIABLE:
+            return "global variable";
+        case PM_TOKEN_GREATER:
+            return "'>'";
+        case PM_TOKEN_GREATER_EQUAL:
+            return "'>='";
+        case PM_TOKEN_GREATER_GREATER:
+            return ">>";
+        case PM_TOKEN_GREATER_GREATER_EQUAL:
+            return ">>=";
+        case PM_TOKEN_HEREDOC_END:
+            return "heredoc ending";
+        case PM_TOKEN_HEREDOC_START:
+            return "heredoc beginning";
+        case PM_TOKEN_IDENTIFIER:
+            return "local variable or method";
+        case PM_TOKEN_IGNORED_NEWLINE:
+            return "ignored newline";
+        case PM_TOKEN_INSTANCE_VARIABLE:
+            return "instance variable";
+        case PM_TOKEN_INTEGER:
+            return "integer";
+        case PM_TOKEN_INTEGER_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_INTEGER_RATIONAL:
+            return "rational";
+        case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
+            return "imaginary";
+        case PM_TOKEN_KEYWORD_ALIAS:
+            return "'alias'";
+        case PM_TOKEN_KEYWORD_AND:
+            return "'and'";
+        case PM_TOKEN_KEYWORD_BEGIN:
+            return "'begin'";
+        case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
+            return "'BEGIN'";
+        case PM_TOKEN_KEYWORD_BREAK:
+            return "'break'";
+        case PM_TOKEN_KEYWORD_CASE:
+            return "'case'";
+        case PM_TOKEN_KEYWORD_CLASS:
+            return "'class'";
+        case PM_TOKEN_KEYWORD_DEF:
+            return "'def'";
+        case PM_TOKEN_KEYWORD_DEFINED:
+            return "'defined?'";
+        case PM_TOKEN_KEYWORD_DO:
+            return "'do'";
+        case PM_TOKEN_KEYWORD_DO_LOOP:
+            return "'do'";
+        case PM_TOKEN_KEYWORD_ELSE:
+            return "'else'";
+        case PM_TOKEN_KEYWORD_ELSIF:
+            return "'elsif'";
+        case PM_TOKEN_KEYWORD_END:
+            return "'end'";
+        case PM_TOKEN_KEYWORD_END_UPCASE:
+            return "'END'";
+        case PM_TOKEN_KEYWORD_ENSURE:
+            return "'ensure'";
+        case PM_TOKEN_KEYWORD_FALSE:
+            return "'false'";
+        case PM_TOKEN_KEYWORD_FOR:
+            return "'for'";
+        case PM_TOKEN_KEYWORD_IF:
+            return "'if'";
+        case PM_TOKEN_KEYWORD_IF_MODIFIER:
+            return "'if'";
+        case PM_TOKEN_KEYWORD_IN:
+            return "'in'";
+        case PM_TOKEN_KEYWORD_MODULE:
+            return "'module'";
+        case PM_TOKEN_KEYWORD_NEXT:
+            return "'next'";
+        case PM_TOKEN_KEYWORD_NIL:
+            return "'nil'";
+        case PM_TOKEN_KEYWORD_NOT:
+            return "'not'";
+        case PM_TOKEN_KEYWORD_OR:
+            return "'or'";
+        case PM_TOKEN_KEYWORD_REDO:
+            return "'redo'";
+        case PM_TOKEN_KEYWORD_RESCUE:
+            return "'rescue'";
+        case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
+            return "'rescue' modifier";
+        case PM_TOKEN_KEYWORD_RETRY:
+            return "'retry'";
+        case PM_TOKEN_KEYWORD_RETURN:
+            return "'return'";
+        case PM_TOKEN_KEYWORD_SELF:
+            return "'self'";
+        case PM_TOKEN_KEYWORD_SUPER:
+            return "'super'";
+        case PM_TOKEN_KEYWORD_THEN:
+            return "'then'";
+        case PM_TOKEN_KEYWORD_TRUE:
+            return "'true'";
+        case PM_TOKEN_KEYWORD_UNDEF:
+            return "'undef'";
+        case PM_TOKEN_KEYWORD_UNLESS:
+            return "'unless'";
+        case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
+            return "'unless'";
+        case PM_TOKEN_KEYWORD_UNTIL:
+            return "'until'";
+        case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
+            return "'until'";
+        case PM_TOKEN_KEYWORD_WHEN:
+            return "'when'";
+        case PM_TOKEN_KEYWORD_WHILE:
+            return "'while'";
+        case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
+            return "'while'";
+        case PM_TOKEN_KEYWORD_YIELD:
+            return "'yield'";
+        case PM_TOKEN_KEYWORD___ENCODING__:
+            return "'__ENCODING__'";
+        case PM_TOKEN_KEYWORD___FILE__:
+            return "'__FILE__'";
+        case PM_TOKEN_KEYWORD___LINE__:
+            return "'__LINE__'";
+        case PM_TOKEN_LABEL:
+            return "label";
+        case PM_TOKEN_LABEL_END:
+            return "label terminator";
+        case PM_TOKEN_LAMBDA_BEGIN:
+            return "'{'";
+        case PM_TOKEN_LESS:
+            return "'<'";
+        case PM_TOKEN_LESS_EQUAL:
+            return "'<='";
+        case PM_TOKEN_LESS_EQUAL_GREATER:
+            return "'<=>'";
+        case PM_TOKEN_LESS_LESS:
+            return "<<";
+        case PM_TOKEN_LESS_LESS_EQUAL:
+            return "<<=";
+        case PM_TOKEN_METHOD_NAME:
+            return "method name";
+        case PM_TOKEN_MINUS:
+            return "'-'";
+        case PM_TOKEN_MINUS_EQUAL:
+            return "'-='";
+        case PM_TOKEN_MINUS_GREATER:
+            return "'->'";
+        case PM_TOKEN_NEWLINE:
+            return "newline";
+        case PM_TOKEN_NUMBERED_REFERENCE:
+            return "numbered reference";
+        case PM_TOKEN_PARENTHESIS_LEFT:
+            return "'('";
+        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+            return "'('";
+        case PM_TOKEN_PARENTHESIS_RIGHT:
+            return "')'";
+        case PM_TOKEN_PERCENT:
+            return "'%'";
+        case PM_TOKEN_PERCENT_EQUAL:
+            return "'%='";
+        case PM_TOKEN_PERCENT_LOWER_I:
+            return "'%i'";
+        case PM_TOKEN_PERCENT_LOWER_W:
+            return "'%w'";
+        case PM_TOKEN_PERCENT_LOWER_X:
+            return "'%x'";
+        case PM_TOKEN_PERCENT_UPPER_I:
+            return "'%I'";
+        case PM_TOKEN_PERCENT_UPPER_W:
+            return "'%W'";
+        case PM_TOKEN_PIPE:
+            return "'|'";
+        case PM_TOKEN_PIPE_EQUAL:
+            return "'|='";
+        case PM_TOKEN_PIPE_PIPE:
+            return "'||'";
+        case PM_TOKEN_PIPE_PIPE_EQUAL:
+            return "'||='";
+        case PM_TOKEN_PLUS:
+            return "'+'";
+        case PM_TOKEN_PLUS_EQUAL:
+            return "'+='";
+        case PM_TOKEN_QUESTION_MARK:
+            return "'?'";
+        case PM_TOKEN_REGEXP_BEGIN:
+            return "regular expression beginning";
+        case PM_TOKEN_REGEXP_END:
+            return "regular expression ending";
+        case PM_TOKEN_SEMICOLON:
+            return "';'";
+        case PM_TOKEN_SLASH:
+            return "'/'";
+        case PM_TOKEN_SLASH_EQUAL:
+            return "'/='";
+        case PM_TOKEN_STAR:
+            return "'*'";
+        case PM_TOKEN_STAR_EQUAL:
+            return "'*='";
+        case PM_TOKEN_STAR_STAR:
+            return "'**'";
+        case PM_TOKEN_STAR_STAR_EQUAL:
+            return "'**='";
+        case PM_TOKEN_STRING_BEGIN:
+            return "string literal";
+        case PM_TOKEN_STRING_CONTENT:
+            return "string content";
+        case PM_TOKEN_STRING_END:
+            return "string ending";
+        case PM_TOKEN_SYMBOL_BEGIN:
+            return "symbol literal";
+        case PM_TOKEN_TILDE:
+            return "'~'";
+        case PM_TOKEN_UAMPERSAND:
+            return "'&'";
+        case PM_TOKEN_UCOLON_COLON:
+            return "'::'";
+        case PM_TOKEN_UDOT_DOT:
+            return "'..'";
+        case PM_TOKEN_UDOT_DOT_DOT:
+            return "'...'";
+        case PM_TOKEN_UMINUS:
+            return "'-'";
+        case PM_TOKEN_UMINUS_NUM:
+            return "'-'";
+        case PM_TOKEN_UPLUS:
+            return "'+'";
+        case PM_TOKEN_USTAR:
+            return "*";
+        case PM_TOKEN_USTAR_STAR:
+            return "**";
+        case PM_TOKEN_WORDS_SEP:
+            return "string separator";
+        case PM_TOKEN___END__:
+            return "'__END__'";
+        case PM_TOKEN_MAXIMUM:
+            assert(false && "unreachable");
+            return "";
+    }
+
+    // Provide a default, because some compilers can't determine that the above
+    // switch is exhaustive.
+    assert(false && "unreachable");
+    return "";
+}
diff --git a/prism/templates/template.rb b/prism/templates/template.rb
new file mode 100755
index 0000000000..6c3efd7e6c
--- /dev/null
+++ b/prism/templates/template.rb
@@ -0,0 +1,689 @@
+#!/usr/bin/env ruby
+# typed: ignore
+
+require "erb"
+require "fileutils"
+require "yaml"
+
+module Prism
+  module Template
+    SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
+    REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS
+    CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
+
+    JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
+    JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
+    INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
+
+    COMMON_FLAGS_COUNT = 2
+
+    class Error
+      attr_reader :name
+
+      def initialize(name)
+        @name = name
+      end
+    end
+
+    class Warning
+      attr_reader :name
+
+      def initialize(name)
+        @name = name
+      end
+    end
+
+    # This module contains methods for escaping characters in JavaDoc comments.
+    module JavaDoc
+      ESCAPES = {
+        "'" => "&#39;",
+        "\"" => "&quot;",
+        "@" => "&#64;",
+        "&" => "&amp;",
+        "<" => "&lt;",
+        ">" => "&gt;"
+      }.freeze
+
+      def self.escape(value)
+        value.gsub(/['&"<>@]/, ESCAPES)
+      end
+    end
+
+    # A comment attached to a field or node.
+    class ConfigComment
+      attr_reader :value
+
+      def initialize(value)
+        @value = value
+      end
+
+      def each_line(&block)
+        value.each_line { |line| yield line.prepend(" ").rstrip }
+      end
+
+      def each_java_line(&block)
+        ConfigComment.new(JavaDoc.escape(value)).each_line(&block)
+      end
+    end
+
+    # This represents a field on a node. It contains all of the necessary
+    # information to template out the code for that field.
+    class Field
+      attr_reader :name, :comment, :options
+
+      def initialize(name:, comment: nil, **options)
+        @name = name
+        @comment = comment
+        @options = options
+      end
+
+      def each_comment_line(&block)
+        ConfigComment.new(comment).each_line(&block) if comment
+      end
+
+      def each_comment_java_line(&block)
+        ConfigComment.new(comment).each_java_line(&block) if comment
+      end
+
+      def semantic_field?
+        true
+      end
+
+      def should_be_serialized?
+        SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true
+      end
+    end
+
+    # Some node fields can be specialized if they point to a specific kind of
+    # node and not just a generic node.
+    class NodeKindField < Field
+      def initialize(kind:, **options)
+        @kind = kind
+        super(**options)
+      end
+
+      def c_type
+        if specific_kind
+          "pm_#{specific_kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}"
+        else
+          "pm_node"
+        end
+      end
+
+      def ruby_type
+        specific_kind || "Node"
+      end
+
+      def java_type
+        specific_kind || "Node"
+      end
+
+      def java_cast
+        if specific_kind
+          "(Nodes.#{@kind}) "
+        else
+          ""
+        end
+      end
+
+      def specific_kind
+        @kind unless @kind.is_a?(Array)
+      end
+
+      def union_kind
+        @kind if @kind.is_a?(Array)
+      end
+    end
+
+    # This represents a field on a node that is itself a node. We pass them as
+    # references and store them as references.
+    class NodeField < NodeKindField
+      def rbs_class
+        if specific_kind
+          specific_kind
+        elsif union_kind
+          union_kind.join(" | ")
+        else
+          "Prism::node"
+        end
+      end
+
+      def rbi_class
+        if specific_kind
+          "Prism::#{specific_kind}"
+        elsif union_kind
+          "T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})"
+        else
+          "Prism::Node"
+        end
+      end
+
+      def check_field_kind
+        if union_kind
+          "[#{union_kind.join(', ')}].include?(#{name}.class)"
+        else
+          "#{name}.is_a?(#{ruby_type})"
+        end
+      end
+    end
+
+    # This represents a field on a node that is itself a node and can be
+    # optionally null. We pass them as references and store them as references.
+    class OptionalNodeField < NodeKindField
+      def rbs_class
+        if specific_kind
+          "#{specific_kind}?"
+        elsif union_kind
+          [*union_kind, "nil"].join(" | ")
+        else
+          "Prism::node?"
+        end
+      end
+
+      def rbi_class
+        if specific_kind
+          "T.nilable(Prism::#{specific_kind})"
+        elsif union_kind
+          "T.nilable(T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")}))"
+        else
+          "T.nilable(Prism::Node)"
+        end
+      end
+
+      def check_field_kind
+        if union_kind
+          "[#{union_kind.join(', ')}, NilClass].include?(#{name}.class)"
+        else
+          "#{name}.nil? || #{name}.is_a?(#{ruby_type})"
+        end
+      end
+    end
+
+    # This represents a field on a node that is a list of nodes. We pass them as
+    # references and store them directly on the struct.
+    class NodeListField < NodeKindField
+      def rbs_class
+        if specific_kind
+          "Array[#{specific_kind}]"
+        elsif union_kind
+          "Array[#{union_kind.join(" | ")}]"
+        else
+          "Array[Prism::node]"
+        end
+      end
+
+      def rbi_class
+        if specific_kind
+          "T::Array[Prism::#{specific_kind}]"
+        elsif union_kind
+          "T::Array[T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})]"
+        else
+          "T::Array[Prism::Node]"
+        end
+      end
+
+      def java_type
+        "#{super}[]"
+      end
+
+      def check_field_kind
+        if union_kind
+          "#{name}.all? { |n| [#{union_kind.join(', ')}].include?(n.class) }"
+        else
+          "#{name}.all? { |n| n.is_a?(#{ruby_type}) }"
+        end
+      end
+    end
+
+    # This represents a field on a node that is the ID of a string interned
+    # through the parser's constant pool.
+    class ConstantField < Field
+      def rbs_class
+        "Symbol"
+      end
+
+      def rbi_class
+        "Symbol"
+      end
+
+      def java_type
+        JAVA_STRING_TYPE
+      end
+    end
+
+    # This represents a field on a node that is the ID of a string interned
+    # through the parser's constant pool and can be optionally null.
+    class OptionalConstantField < Field
+      def rbs_class
+        "Symbol?"
+      end
+
+      def rbi_class
+        "T.nilable(Symbol)"
+      end
+
+      def java_type
+        JAVA_STRING_TYPE
+      end
+    end
+
+    # This represents a field on a node that is a list of IDs that are associated
+    # with strings interned through the parser's constant pool.
+    class ConstantListField < Field
+      def rbs_class
+        "Array[Symbol]"
+      end
+
+      def rbi_class
+        "T::Array[Symbol]"
+      end
+
+      def java_type
+        "#{JAVA_STRING_TYPE}[]"
+      end
+    end
+
+    # This represents a field on a node that is a string.
+    class StringField < Field
+      def rbs_class
+        "String"
+      end
+
+      def rbi_class
+        "String"
+      end
+
+      def java_type
+        "byte[]"
+      end
+    end
+
+    # This represents a field on a node that is a location.
+    class LocationField < Field
+      def semantic_field?
+        false
+      end
+
+      def rbs_class
+        "Location"
+      end
+
+      def rbi_class
+        "Prism::Location"
+      end
+
+      def java_type
+        "Location"
+      end
+    end
+
+    # This represents a field on a node that is a location that is optional.
+    class OptionalLocationField < Field
+      def semantic_field?
+        false
+      end
+
+      def rbs_class
+        "Location?"
+      end
+
+      def rbi_class
+        "T.nilable(Prism::Location)"
+      end
+
+      def java_type
+        "Location"
+      end
+    end
+
+    # This represents an integer field.
+    class UInt8Field < Field
+      def rbs_class
+        "Integer"
+      end
+
+      def rbi_class
+        "Integer"
+      end
+
+      def java_type
+        "int"
+      end
+    end
+
+    # This represents an integer field.
+    class UInt32Field < Field
+      def rbs_class
+        "Integer"
+      end
+
+      def rbi_class
+        "Integer"
+      end
+
+      def java_type
+        "int"
+      end
+    end
+
+    # This represents an arbitrarily-sized integer. When it gets to Ruby it will
+    # be an Integer.
+    class IntegerField < Field
+      def rbs_class
+        "Integer"
+      end
+
+      def rbi_class
+        "Integer"
+      end
+
+      def java_type
+        "Object"
+      end
+    end
+
+    # This represents a double-precision floating point number. When it gets to
+    # Ruby it will be a Float.
+    class DoubleField < Field
+      def rbs_class
+        "Float"
+      end
+
+      def rbi_class
+        "Float"
+      end
+
+      def java_type
+        "double"
+      end
+    end
+
+    # This class represents a node in the tree, configured by the config.yml file
+    # in YAML format. It contains information about the name of the node and the
+    # various child nodes it contains.
+    class NodeType
+      attr_reader :name, :type, :human, :flags, :fields, :newline, :comment
+
+      def initialize(config, flags)
+        @name = config.fetch("name")
+
+        type = @name.gsub(/(?<=.)[A-Z]/, "_\\0")
+        @type = "PM_#{type.upcase}"
+        @human = type.downcase
+
+        @fields =
+          config.fetch("fields", []).map do |field|
+            type = field_type_for(field.fetch("type"))
+
+            options = field.transform_keys(&:to_sym)
+            options.delete(:type)
+
+            # If/when we have documentation on every field, this should be
+            # changed to use fetch instead of delete.
+            comment = options.delete(:comment)
+
+            if kinds = options[:kind]
+              kinds = [kinds] unless kinds.is_a?(Array)
+              kinds = kinds.map do |kind|
+                case kind
+                when "non-void expression"
+                  # the actual list of types would be way too long
+                  "Node"
+                when "pattern expression"
+                  # the list of all possible types is too long with 37+ different classes
+                  "Node"
+                when Hash
+                  kind = kind.fetch("on error")
+                  REMOVE_ON_ERROR_TYPES ? nil : kind
+                else
+                  kind
+                end
+              end.compact
+              if kinds.size == 1
+                kinds = kinds.first
+                kinds = nil if kinds == "Node"
+              end
+              options[:kind] = kinds
+            else
+              if type < NodeKindField
+                raise "Missing kind in config.yml for field #{@name}##{options.fetch(:name)}"
+              end
+            end
+
+            type.new(comment: comment, **options)
+          end
+
+        @flags = config.key?("flags") ? flags.fetch(config.fetch("flags")) : nil
+        @newline = config.fetch("newline", true)
+        @comment = config.fetch("comment")
+      end
+
+      def each_comment_line(&block)
+        ConfigComment.new(comment).each_line(&block)
+      end
+
+      def each_comment_java_line(&block)
+        ConfigComment.new(comment).each_java_line(&block)
+      end
+
+      def semantic_fields
+        @semantic_fields ||= @fields.select(&:semantic_field?)
+      end
+
+      # Should emit serialized length of node so implementations can skip
+      # the node to enable lazy parsing.
+      def needs_serialized_length?
+        name == "DefNode"
+      end
+
+      private
+
+      def field_type_for(name)
+        case name
+        when "node"       then NodeField
+        when "node?"      then OptionalNodeField
+        when "node[]"     then NodeListField
+        when "string"     then StringField
+        when "constant"   then ConstantField
+        when "constant?"  then OptionalConstantField
+        when "constant[]" then ConstantListField
+        when "location"   then LocationField
+        when "location?"  then OptionalLocationField
+        when "uint8"      then UInt8Field
+        when "uint32"     then UInt32Field
+        when "integer"    then IntegerField
+        when "double"     then DoubleField
+        else raise("Unknown field type: #{name.inspect}")
+        end
+      end
+    end
+
+    # This represents a token in the lexer.
+    class Token
+      attr_reader :name, :value, :comment
+
+      def initialize(config)
+        @name = config.fetch("name")
+        @value = config["value"]
+        @comment = config.fetch("comment")
+      end
+    end
+
+    # Represents a set of flags that should be internally represented with an enum.
+    class Flags
+      # Represents an individual flag within a set of flags.
+      class Flag
+        attr_reader :name, :camelcase, :comment
+
+        def initialize(config)
+          @name = config.fetch("name")
+          @camelcase = @name.split("_").map(&:capitalize).join
+          @comment = config.fetch("comment")
+        end
+      end
+
+      attr_reader :name, :human, :values, :comment
+
+      def initialize(config)
+        @name = config.fetch("name")
+        @human = @name.gsub(/(?<=.)[A-Z]/, "_\\0").downcase
+        @values = config.fetch("values").map { |flag| Flag.new(flag) }
+        @comment = config.fetch("comment")
+      end
+
+      def self.empty
+        new("name" => "", "values" => [], "comment" => "")
+      end
+    end
+
+    class << self
+      # This templates out a file using ERB with the given locals. The locals are
+      # derived from the config.yml file.
+      def render(name, write_to: nil)
+        filepath = "templates/#{name}.erb"
+        template = File.expand_path("../#{filepath}", __dir__)
+
+        erb = read_template(template)
+        extension = File.extname(filepath.gsub(".erb", ""))
+
+        heading =
+          case extension
+          when ".rb"
+            <<~HEADING
+            # frozen_string_literal: true
+            # :markup: markdown
+
+            =begin
+            --
+            This file is generated by the templates/template.rb script and should not be
+            modified manually. See #{filepath}
+            if you are looking to modify the template
+            ++
+            =end
+
+            HEADING
+          when ".rbs"
+            <<~HEADING
+            # This file is generated by the templates/template.rb script and should not be
+            # modified manually. See #{filepath}
+            # if you are looking to modify the template
+
+            HEADING
+          when ".rbi"
+            <<~HEADING
+            # typed: strict
+
+            =begin
+            This file is generated by the templates/template.rb script and should not be
+            modified manually. See #{filepath}
+            if you are looking to modify the template
+            =end
+
+            HEADING
+          else
+            <<~HEADING
+            /* :markup: markdown */
+
+            /*----------------------------------------------------------------------------*/
+            /* This file is generated by the templates/template.rb script and should not  */
+            /* be modified manually. See                                                  */
+            /* #{filepath.ljust(74)} */
+            /* if you are looking to modify the                                           */
+            /* template                                                                   */
+            /*----------------------------------------------------------------------------*/
+
+            HEADING
+          end
+
+        write_to ||= File.expand_path("../#{name}", __dir__)
+        contents = heading + erb.result_with_hash(locals)
+
+        if (extension == ".c" || extension == ".h") && !contents.ascii_only?
+          # Enforce that we only have ASCII characters here. This is necessary
+          # for non-UTF-8 locales that only allow ASCII characters in C source
+          # files.
+          contents.each_line.with_index(1) do |line, line_number|
+            raise "Non-ASCII character on line #{line_number} of #{write_to}" unless line.ascii_only?
+          end
+        end
+
+        FileUtils.mkdir_p(File.dirname(write_to))
+        File.write(write_to, contents)
+      end
+
+      private
+
+      def read_template(filepath)
+        template = File.read(filepath, encoding: Encoding::UTF_8)
+        erb = erb(template)
+        erb.filename = filepath
+        erb
+      end
+
+      def erb(template)
+        ERB.new(template, trim_mode: "-")
+      end
+
+      def locals
+        @locals ||=
+          begin
+            config = YAML.load_file(File.expand_path("../config.yml", __dir__))
+            flags = config.fetch("flags").to_h { |flags| [flags["name"], Flags.new(flags)] }
+
+            {
+              errors: config.fetch("errors").map { |name| Error.new(name) },
+              warnings: config.fetch("warnings").map { |name| Warning.new(name) },
+              nodes: config.fetch("nodes").map { |node| NodeType.new(node, flags) }.sort_by(&:name),
+              tokens: config.fetch("tokens").map { |token| Token.new(token) },
+              flags: flags.values
+            }
+          end
+      end
+    end
+
+    TEMPLATES = [
+      "ext/prism/api_node.c",
+      "include/prism/ast.h",
+      "include/prism/diagnostic.h",
+      "javascript/src/deserialize.js",
+      "javascript/src/nodes.js",
+      "javascript/src/visitor.js",
+      "java/org/prism/Loader.java",
+      "java/org/prism/Nodes.java",
+      "java/org/prism/AbstractNodeVisitor.java",
+      "lib/prism/compiler.rb",
+      "lib/prism/dispatcher.rb",
+      "lib/prism/dot_visitor.rb",
+      "lib/prism/dsl.rb",
+      "lib/prism/inspect_visitor.rb",
+      "lib/prism/mutation_compiler.rb",
+      "lib/prism/node.rb",
+      "lib/prism/reflection.rb",
+      "lib/prism/serialize.rb",
+      "lib/prism/visitor.rb",
+      "src/diagnostic.c",
+      "src/node.c",
+      "src/prettyprint.c",
+      "src/serialize.c",
+      "src/token_type.c",
+      "rbi/prism/dsl.rbi",
+      "rbi/prism/node.rbi",
+      "rbi/prism/visitor.rbi",
+      "sig/prism.rbs",
+      "sig/prism/dsl.rbs",
+      "sig/prism/mutation_compiler.rbs",
+      "sig/prism/node.rbs",
+      "sig/prism/visitor.rbs",
+      "sig/prism/_private/dot_visitor.rbs"
+    ]
+  end
+end
+
+if __FILE__ == $0
+  if ARGV.empty?
+    Prism::Template::TEMPLATES.each { |filepath| Prism::Template.render(filepath) }
+  else # ruby/ruby
+    name, write_to = ARGV
+    Prism::Template.render(name, write_to: write_to)
+  end
+end
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
new file mode 100644
index 0000000000..2136a7c43e
--- /dev/null
+++ b/prism/util/pm_buffer.c
@@ -0,0 +1,357 @@
+#include "prism/util/pm_buffer.h"
+
+/**
+ * Return the size of the pm_buffer_t struct.
+ */
+size_t
+pm_buffer_sizeof(void) {
+    return sizeof(pm_buffer_t);
+}
+
+/**
+ * Initialize a pm_buffer_t with the given capacity.
+ */
+bool
+pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
+    buffer->length = 0;
+    buffer->capacity = capacity;
+
+    buffer->value = (char *) xmalloc(capacity);
+    return buffer->value != NULL;
+}
+
+/**
+ * Initialize a pm_buffer_t with its default values.
+ */
+bool
+pm_buffer_init(pm_buffer_t *buffer) {
+    return pm_buffer_init_capacity(buffer, 1024);
+}
+
+/**
+ * Return the value of the buffer.
+ */
+char *
+pm_buffer_value(const pm_buffer_t *buffer) {
+    return buffer->value;
+}
+
+/**
+ * Return the length of the buffer.
+ */
+size_t
+pm_buffer_length(const pm_buffer_t *buffer) {
+    return buffer->length;
+}
+
+/**
+ * Append the given amount of space to the buffer.
+ */
+static inline bool
+pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
+    size_t next_length = buffer->length + length;
+
+    if (next_length > buffer->capacity) {
+        if (buffer->capacity == 0) {
+            buffer->capacity = 1;
+        }
+
+        while (next_length > buffer->capacity) {
+            buffer->capacity *= 2;
+        }
+
+        buffer->value = xrealloc(buffer->value, buffer->capacity);
+        if (buffer->value == NULL) return false;
+    }
+
+    buffer->length = next_length;
+    return true;
+}
+
+/**
+ * Append a generic pointer to memory to the buffer.
+ */
+static inline void
+pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        memcpy(buffer->value + cursor, source, length);
+    }
+}
+
+/**
+ * Append the given amount of space as zeroes to the buffer.
+ */
+void
+pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        memset(buffer->value + cursor, 0, length);
+    }
+}
+
+/**
+ * Append a formatted string to the buffer.
+ */
+void
+pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) {
+    va_list arguments;
+    va_start(arguments, format);
+    int result = vsnprintf(NULL, 0, format, arguments);
+    va_end(arguments);
+
+    if (result < 0) return;
+    size_t length = (size_t) (result + 1);
+
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        va_start(arguments, format);
+        vsnprintf(buffer->value + cursor, length, format, arguments);
+        va_end(arguments);
+        buffer->length--;
+    }
+}
+
+/**
+ * Append a string to the buffer.
+ */
+void
+pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length) {
+    pm_buffer_append(buffer, value, length);
+}
+
+/**
+ * Append a list of bytes to the buffer.
+ */
+void
+pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
+    pm_buffer_append(buffer, (const char *) value, length);
+}
+
+/**
+ * Append a single byte to the buffer.
+ */
+void
+pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
+    const void *source = &value;
+    pm_buffer_append(buffer, source, sizeof(uint8_t));
+}
+
+/**
+ * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
+ */
+void
+pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
+    if (value < 128) {
+        pm_buffer_append_byte(buffer, (uint8_t) value);
+    } else {
+        uint32_t n = value;
+        while (n >= 128) {
+            pm_buffer_append_byte(buffer, (uint8_t) (n | 128));
+            n >>= 7;
+        }
+        pm_buffer_append_byte(buffer, (uint8_t) n);
+    }
+}
+
+/**
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
+ */
+void
+pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
+    uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
+    pm_buffer_append_varuint(buffer, unsigned_int);
+}
+
+/**
+ * Append a double to the buffer.
+ */
+void
+pm_buffer_append_double(pm_buffer_t *buffer, double value) {
+    const void *source = &value;
+    pm_buffer_append(buffer, source, sizeof(double));
+}
+
+/**
+ * Append a unicode codepoint to the buffer.
+ */
+bool
+pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value) {
+    if (value <= 0x7F) {
+        pm_buffer_append_byte(buffer, (uint8_t) value); // 0xxxxxxx
+        return true;
+    } else if (value <= 0x7FF) {
+        uint8_t bytes[] = {
+            (uint8_t) (0xC0 | ((value >> 6) & 0x3F)), // 110xxxxx
+            (uint8_t) (0x80 | (value & 0x3F))         // 10xxxxxx
+        };
+
+        pm_buffer_append_bytes(buffer, bytes, 2);
+        return true;
+    } else if (value <= 0xFFFF) {
+        uint8_t bytes[] = {
+            (uint8_t) (0xE0 | ((value >> 12) & 0x3F)), // 1110xxxx
+            (uint8_t) (0x80 | ((value >> 6) & 0x3F)),  // 10xxxxxx
+            (uint8_t) (0x80 | (value & 0x3F))          // 10xxxxxx
+        };
+
+        pm_buffer_append_bytes(buffer, bytes, 3);
+        return true;
+    } else if (value <= 0x10FFFF) {
+        uint8_t bytes[] = {
+            (uint8_t) (0xF0 | ((value >> 18) & 0x3F)), // 11110xxx
+            (uint8_t) (0x80 | ((value >> 12) & 0x3F)), // 10xxxxxx
+            (uint8_t) (0x80 | ((value >> 6) & 0x3F)),  // 10xxxxxx
+            (uint8_t) (0x80 | (value & 0x3F))          // 10xxxxxx
+        };
+
+        pm_buffer_append_bytes(buffer, bytes, 4);
+        return true;
+    } else {
+        return false;
+    }
+}
+
+/**
+ * Append a slice of source code to the buffer.
+ */
+void
+pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) {
+    for (size_t index = 0; index < length; index++) {
+        const uint8_t byte = source[index];
+
+        if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
+            if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+                pm_buffer_append_format(buffer, "\\x%02X", byte);
+            } else {
+                pm_buffer_append_format(buffer, "\\u%04X", byte);
+            }
+        } else {
+            switch (byte) {
+                case '\a':
+                    if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+                        pm_buffer_append_string(buffer, "\\a", 2);
+                    } else {
+                        pm_buffer_append_format(buffer, "\\u%04X", byte);
+                    }
+                    break;
+                case '\b':
+                    pm_buffer_append_string(buffer, "\\b", 2);
+                    break;
+                case '\t':
+                    pm_buffer_append_string(buffer, "\\t", 2);
+                    break;
+                case '\n':
+                    pm_buffer_append_string(buffer, "\\n", 2);
+                    break;
+                case '\v':
+                    if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+                        pm_buffer_append_string(buffer, "\\v", 2);
+                    } else {
+                        pm_buffer_append_format(buffer, "\\u%04X", byte);
+                    }
+                    break;
+                case '\f':
+                    pm_buffer_append_string(buffer, "\\f", 2);
+                    break;
+                case '\r':
+                    pm_buffer_append_string(buffer, "\\r", 2);
+                    break;
+                case '"':
+                    pm_buffer_append_string(buffer, "\\\"", 2);
+                    break;
+                case '#': {
+                    if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) {
+                        const uint8_t next_byte = source[index + 1];
+                        if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
+                            pm_buffer_append_byte(buffer, '\\');
+                        }
+                    }
+
+                    pm_buffer_append_byte(buffer, '#');
+                    break;
+                }
+                case '\\':
+                    pm_buffer_append_string(buffer, "\\\\", 2);
+                    break;
+                default:
+                    pm_buffer_append_byte(buffer, byte);
+                    break;
+            }
+        }
+    }
+}
+
+/**
+ * Prepend the given string to the buffer.
+ */
+void
+pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        memmove(buffer->value + length, buffer->value, cursor);
+        memcpy(buffer->value, value, length);
+    }
+}
+
+/**
+ * Concatenate one buffer onto another.
+ */
+void
+pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source) {
+    if (source->length > 0) {
+        pm_buffer_append(destination, source->value, source->length);
+    }
+}
+
+/**
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ */
+void
+pm_buffer_clear(pm_buffer_t *buffer) {
+    buffer->length = 0;
+}
+
+/**
+ * Strip the whitespace from the end of the buffer.
+ */
+void
+pm_buffer_rstrip(pm_buffer_t *buffer) {
+    while (buffer->length > 0 && pm_char_is_whitespace((uint8_t) buffer->value[buffer->length - 1])) {
+        buffer->length--;
+    }
+}
+
+/**
+ * Checks if the buffer includes the given value.
+ */
+size_t
+pm_buffer_index(const pm_buffer_t *buffer, char value) {
+    const char *first = memchr(buffer->value, value, buffer->length);
+    return (first == NULL) ? SIZE_MAX : (size_t) (first - buffer->value);
+}
+
+/**
+ * Insert the given string into the buffer at the given index.
+ */
+void
+pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) {
+    assert(index <= buffer->length);
+
+    if (index == buffer->length) {
+        pm_buffer_append_string(buffer, value, length);
+    } else {
+        pm_buffer_append_zeroes(buffer, length);
+        memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index);
+        memcpy(buffer->value + index, value, length);
+    }
+}
+
+/**
+ * Free the memory associated with the buffer.
+ */
+void
+pm_buffer_free(pm_buffer_t *buffer) {
+    xfree(buffer->value);
+}
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
new file mode 100644
index 0000000000..cb80f8b3ce
--- /dev/null
+++ b/prism/util/pm_buffer.h
@@ -0,0 +1,236 @@
+/**
+ * @file pm_buffer.h
+ *
+ * A wrapper around a contiguous block of allocated memory.
+ */
+#ifndef PRISM_BUFFER_H
+#define PRISM_BUFFER_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_char.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * A pm_buffer_t is a simple memory buffer that stores data in a contiguous
+ * block of memory.
+ */
+typedef struct {
+    /** The length of the buffer in bytes. */
+    size_t length;
+
+    /** The capacity of the buffer in bytes that has been allocated. */
+    size_t capacity;
+
+    /** A pointer to the start of the buffer. */
+    char *value;
+} pm_buffer_t;
+
+/**
+ * Return the size of the pm_buffer_t struct.
+ *
+ * @returns The size of the pm_buffer_t struct.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
+
+/**
+ * Initialize a pm_buffer_t with the given capacity.
+ *
+ * @param buffer The buffer to initialize.
+ * @param capacity The capacity of the buffer.
+ * @returns True if the buffer was initialized successfully, false otherwise.
+ */
+bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
+
+/**
+ * Initialize a pm_buffer_t with its default values.
+ *
+ * @param buffer The buffer to initialize.
+ * @returns True if the buffer was initialized successfully, false otherwise.
+ *
+ * \public \memberof pm_buffer_t
+ */
+PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
+
+/**
+ * Return the value of the buffer.
+ *
+ * @param buffer The buffer to get the value of.
+ * @returns The value of the buffer.
+ *
+ * \public \memberof pm_buffer_t
+ */
+PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer);
+
+/**
+ * Return the length of the buffer.
+ *
+ * @param buffer The buffer to get the length of.
+ * @returns The length of the buffer.
+ *
+ * \public \memberof pm_buffer_t
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer);
+
+/**
+ * Append the given amount of space as zeroes to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param length The amount of space to append and zero.
+ */
+void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
+
+/**
+ * Append a formatted string to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param format The format string to append.
+ * @param ... The arguments to the format string.
+ */
+void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
+
+/**
+ * Append a string to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The string to append.
+ * @param length The length of the string to append.
+ */
+void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/**
+ * Append a list of bytes to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The bytes to append.
+ * @param length The length of the bytes to append.
+ */
+void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
+
+/**
+ * Append a single byte to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The byte to append.
+ */
+void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
+
+/**
+ * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The integer to append.
+ */
+void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
+
+/**
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The integer to append.
+ */
+void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
+
+/**
+ * Append a double to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The double to append.
+ */
+void pm_buffer_append_double(pm_buffer_t *buffer, double value);
+
+/**
+ * Append a unicode codepoint to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The character to append.
+ * @returns True if the codepoint was valid and appended successfully, false
+ *   otherwise.
+ */
+bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
+
+/**
+ * The different types of escaping that can be performed by the buffer when
+ * appending a slice of Ruby source code.
+ */
+typedef enum {
+    PM_BUFFER_ESCAPING_RUBY,
+    PM_BUFFER_ESCAPING_JSON
+} pm_buffer_escaping_t;
+
+/**
+ * Append a slice of source code to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param source The source code to append.
+ * @param length The length of the source code to append.
+ * @param escaping The type of escaping to perform.
+ */
+void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
+
+/**
+ * Prepend the given string to the buffer.
+ *
+ * @param buffer The buffer to prepend to.
+ * @param value The string to prepend.
+ * @param length The length of the string to prepend.
+ */
+void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/**
+ * Concatenate one buffer onto another.
+ *
+ * @param destination The buffer to concatenate onto.
+ * @param source The buffer to concatenate.
+ */
+void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
+
+/**
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ *
+ * @param buffer The buffer to clear.
+ */
+void pm_buffer_clear(pm_buffer_t *buffer);
+
+/**
+ * Strip the whitespace from the end of the buffer.
+ *
+ * @param buffer The buffer to strip.
+ */
+void pm_buffer_rstrip(pm_buffer_t *buffer);
+
+/**
+ * Checks if the buffer includes the given value.
+ *
+ * @param buffer The buffer to check.
+ * @param value The value to check for.
+ * @returns The index of the first occurrence of the value in the buffer, or
+ *   SIZE_MAX if the value is not found.
+ */
+size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/**
+ * Insert the given string into the buffer at the given index.
+ *
+ * @param buffer The buffer to insert into.
+ * @param index The index to insert at.
+ * @param value The string to insert.
+ * @param length The length of the string to insert.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+/**
+ * Free the memory associated with the buffer.
+ *
+ * @param buffer The buffer to free.
+ *
+ * \public \memberof pm_buffer_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
+
+#endif
diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c
new file mode 100644
index 0000000000..a51dc11645
--- /dev/null
+++ b/prism/util/pm_char.c
@@ -0,0 +1,318 @@
+#include "prism/util/pm_char.h"
+
+#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
+#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
+
+#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
+#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
+#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
+#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
+#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
+#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
+#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
+#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
+
+static const uint8_t pm_byte_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
+    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
+    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+static const uint8_t pm_number_table[256] = {
+    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
+    0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
+    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
+    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
+};
+
+/**
+ * Returns the number of characters at the start of the string that match the
+ * given kind. Disallows searching past the given maximum number of characters.
+ */
+static inline size_t
+pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
+    return size;
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace while also tracking the location of each newline. Disallows
+ * searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
+        if (string[size] == '\n') {
+            pm_newline_list_append(newline_list, string + size);
+        }
+
+        size++;
+    }
+
+    return size;
+}
+
+/**
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are regexp
+ * options. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
+}
+
+/**
+ * Returns true if the given character matches the given kind.
+ */
+static inline bool
+pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
+    return (pm_byte_table[b] & kind) != 0;
+}
+
+/**
+ * Returns true if the given character is a whitespace character.
+ */
+bool
+pm_char_is_whitespace(const uint8_t b) {
+    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
+}
+
+/**
+ * Returns true if the given character is an inline whitespace character.
+ */
+bool
+pm_char_is_inline_whitespace(const uint8_t b) {
+    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+/**
+ * Scan through the string and return the number of characters at the start of
+ * the string that match the given kind. Disallows searching past the given
+ * maximum number of characters.
+ */
+static inline size_t
+pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
+    return size;
+}
+
+/**
+ * Scan through the string and return the number of characters at the start of
+ * the string that match the given kind. Disallows searching past the given
+ * maximum number of characters.
+ *
+ * Additionally, report the location of the last invalid underscore character
+ * found in the string through the out invalid parameter.
+ */
+static inline size_t
+pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    bool underscore = false;
+    while (size < maximum && (pm_number_table[string[size]] & kind)) {
+        if (string[size] == '_') {
+            if (underscore) *invalid = string + size;
+            underscore = true;
+        } else {
+            underscore = false;
+        }
+
+        size++;
+    }
+
+    if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
+    return size;
+}
+
+/**
+ * Returns the number of characters at the start of the string that are binary
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t
+pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are octal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t
+pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore
+ */
+size_t
+pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits. Disallows searching past the given maximum number of
+ * characters.
+ */
+size_t
+pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits or underscores. Disallows searching past the given maximum
+ * number of characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t
+pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
+}
+
+/**
+ * Returns true if the given character matches the given kind.
+ */
+static inline bool
+pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
+    return (pm_number_table[b] & kind) != 0;
+}
+
+/**
+ * Returns true if the given character is a binary digit.
+ */
+bool
+pm_char_is_binary_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
+}
+
+/**
+ * Returns true if the given character is an octal digit.
+ */
+bool
+pm_char_is_octal_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
+}
+
+/**
+ * Returns true if the given character is a decimal digit.
+ */
+bool
+pm_char_is_decimal_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+/**
+ * Returns true if the given character is a hexadecimal digit.
+ */
+bool
+pm_char_is_hexadecimal_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+#undef PRISM_CHAR_BIT_WHITESPACE
+#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
+#undef PRISM_CHAR_BIT_REGEXP_OPTION
+
+#undef PRISM_NUMBER_BIT_BINARY_DIGIT
+#undef PRISM_NUMBER_BIT_BINARY_NUMBER
+#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
+#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
+#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
+#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
+#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
+#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
new file mode 100644
index 0000000000..deeafd6321
--- /dev/null
+++ b/prism/util/pm_char.h
@@ -0,0 +1,204 @@
+/**
+ * @file pm_char.h
+ *
+ * Functions for working with characters and strings.
+ */
+#ifndef PRISM_CHAR_H
+#define PRISM_CHAR_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_newline_list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are
+ *     whitespace.
+ */
+size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace while also tracking the location of each newline. Disallows
+ * searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param newline_list The list of newlines to populate.
+ * @return The number of characters at the start of the string that are
+ *     whitespace.
+ */
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
+
+/**
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are inline
+ *     whitespace.
+ */
+size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are decimal
+ *     digits.
+ */
+size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are
+ *     hexadecimal digits.
+ */
+size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are octal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are octal
+ *     digits or underscores.
+ */
+size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are decimal
+ *     digits or underscores.
+ */
+size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits or underscores. Disallows searching past the given maximum
+ * number of characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are
+ *     hexadecimal digits or underscores.
+ */
+size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns the number of characters at the start of the string that are regexp
+ * options. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are regexp
+ *     options.
+ */
+size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are binary
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are binary
+ *     digits or underscores.
+ */
+size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns true if the given character is a whitespace character.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a whitespace character.
+ */
+bool pm_char_is_whitespace(const uint8_t b);
+
+/**
+ * Returns true if the given character is an inline whitespace character.
+ *
+ * @param b The character to check.
+ * @return True if the given character is an inline whitespace character.
+ */
+bool pm_char_is_inline_whitespace(const uint8_t b);
+
+/**
+ * Returns true if the given character is a binary digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a binary digit.
+ */
+bool pm_char_is_binary_digit(const uint8_t b);
+
+/**
+ * Returns true if the given character is an octal digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is an octal digit.
+ */
+bool pm_char_is_octal_digit(const uint8_t b);
+
+/**
+ * Returns true if the given character is a decimal digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a decimal digit.
+ */
+bool pm_char_is_decimal_digit(const uint8_t b);
+
+/**
+ * Returns true if the given character is a hexadecimal digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a hexadecimal digit.
+ */
+bool pm_char_is_hexadecimal_digit(const uint8_t b);
+
+#endif
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
new file mode 100644
index 0000000000..922ce6a18c
--- /dev/null
+++ b/prism/util/pm_constant_pool.c
@@ -0,0 +1,342 @@
+#include "prism/util/pm_constant_pool.h"
+
+/**
+ * Initialize a list of constant ids.
+ */
+void
+pm_constant_id_list_init(pm_constant_id_list_t *list) {
+    list->ids = NULL;
+    list->size = 0;
+    list->capacity = 0;
+}
+
+/**
+ * Initialize a list of constant ids with a given capacity.
+ */
+void
+pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
+    if (capacity) {
+        list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
+        if (list->ids == NULL) abort();
+    } else {
+        list->ids = NULL;
+    }
+
+    list->size = 0;
+    list->capacity = capacity;
+}
+
+/**
+ * Append a constant id to a list of constant ids. Returns false if any
+ * potential reallocations fail.
+ */
+bool
+pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
+    if (list->size >= list->capacity) {
+        list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
+        list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
+        if (list->ids == NULL) return false;
+    }
+
+    list->ids[list->size++] = id;
+    return true;
+}
+
+/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ */
+void
+pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
+    assert(index < list->capacity);
+    assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
+
+    list->ids[index] = id;
+    list->size++;
+}
+
+/**
+ * Checks if the current constant id list includes the given constant id.
+ */
+bool
+pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
+    for (size_t index = 0; index < list->size; index++) {
+        if (list->ids[index] == id) return true;
+    }
+    return false;
+}
+
+/**
+ * Free the memory associated with a list of constant ids.
+ */
+void
+pm_constant_id_list_free(pm_constant_id_list_t *list) {
+    if (list->ids != NULL) {
+        xfree(list->ids);
+    }
+}
+
+/**
+ * A relatively simple hash function (djb2) that is used to hash strings. We are
+ * optimizing here for simplicity and speed.
+ */
+static inline uint32_t
+pm_constant_pool_hash(const uint8_t *start, size_t length) {
+    // This is a prime number used as the initial value for the hash function.
+    uint32_t value = 5381;
+
+    for (size_t index = 0; index < length; index++) {
+        value = ((value << 5) + value) + start[index];
+    }
+
+    return value;
+}
+
+/**
+ * https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ */
+static uint32_t
+next_power_of_two(uint32_t v) {
+    // Avoid underflow in subtraction on next line.
+    if (v == 0) {
+        // 1 is the nearest power of 2 to 0 (2^0)
+        return 1;
+    }
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+#ifndef NDEBUG
+static bool
+is_power_of_two(uint32_t size) {
+    return (size & (size - 1)) == 0;
+}
+#endif
+
+/**
+ * Resize a constant pool to a given capacity.
+ */
+static inline bool
+pm_constant_pool_resize(pm_constant_pool_t *pool) {
+    assert(is_power_of_two(pool->capacity));
+
+    uint32_t next_capacity = pool->capacity * 2;
+    if (next_capacity < pool->capacity) return false;
+
+    const uint32_t mask = next_capacity - 1;
+    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
+
+    void *next = xcalloc(next_capacity, element_size);
+    if (next == NULL) return false;
+
+    pm_constant_pool_bucket_t *next_buckets = next;
+    pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
+
+    // For each bucket in the current constant pool, find the index in the
+    // next constant pool, and insert it.
+    for (uint32_t index = 0; index < pool->capacity; index++) {
+        pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
+
+        // If an id is set on this constant, then we know we have content here.
+        // In this case we need to insert it into the next constant pool.
+        if (bucket->id != PM_CONSTANT_ID_UNSET) {
+            uint32_t next_index = bucket->hash & mask;
+
+            // This implements linear scanning to find the next available slot
+            // in case this index is already taken. We don't need to bother
+            // comparing the values since we know that the hash is unique.
+            while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
+                next_index = (next_index + 1) & mask;
+            }
+
+            // Here we copy over the entire bucket, which includes the id so
+            // that they are consistent between resizes.
+            next_buckets[next_index] = *bucket;
+        }
+    }
+
+    // The constants are stable with respect to hash table resizes.
+    memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
+
+    // pool->constants and pool->buckets are allocated out of the same chunk
+    // of memory, with the buckets coming first.
+    xfree(pool->buckets);
+    pool->constants = next_constants;
+    pool->buckets = next_buckets;
+    pool->capacity = next_capacity;
+    return true;
+}
+
+/**
+ * Initialize a new constant pool with a given capacity.
+ */
+bool
+pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
+    const uint32_t maximum = (~((uint32_t) 0));
+    if (capacity >= ((maximum / 2) + 1)) return false;
+
+    capacity = next_power_of_two(capacity);
+    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
+    void *memory = xcalloc(capacity, element_size);
+    if (memory == NULL) return false;
+
+    pool->buckets = memory;
+    pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
+    pool->size = 0;
+    pool->capacity = capacity;
+    return true;
+}
+
+/**
+ * Return a pointer to the constant indicated by the given constant id.
+ */
+pm_constant_t *
+pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
+    assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
+    return &pool->constants[constant_id - 1];
+}
+
+/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ */
+pm_constant_id_t
+pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    assert(is_power_of_two(pool->capacity));
+    const uint32_t mask = pool->capacity - 1;
+
+    uint32_t hash = pm_constant_pool_hash(start, length);
+    uint32_t index = hash & mask;
+    pm_constant_pool_bucket_t *bucket;
+
+    while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
+        pm_constant_t *constant = &pool->constants[bucket->id - 1];
+        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+            return bucket->id;
+        }
+
+        index = (index + 1) & mask;
+    }
+
+    return PM_CONSTANT_ID_UNSET;
+}
+
+/**
+ * Insert a constant into a constant pool and return its index in the pool.
+ */
+static inline pm_constant_id_t
+pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
+    if (pool->size >= (pool->capacity / 4 * 3)) {
+        if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
+    }
+
+    assert(is_power_of_two(pool->capacity));
+    const uint32_t mask = pool->capacity - 1;
+
+    uint32_t hash = pm_constant_pool_hash(start, length);
+    uint32_t index = hash & mask;
+    pm_constant_pool_bucket_t *bucket;
+
+    while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
+        // If there is a collision, then we need to check if the content is the
+        // same as the content we are trying to insert. If it is, then we can
+        // return the id of the existing constant.
+        pm_constant_t *constant = &pool->constants[bucket->id - 1];
+
+        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+            // Since we have found a match, we need to check if this is
+            // attempting to insert a shared or an owned constant. We want to
+            // prefer shared constants since they don't require allocations.
+            if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+                // If we're attempting to insert an owned constant and we have
+                // an existing constant, then either way we don't want the given
+                // memory. Either it's duplicated with the existing constant or
+                // it's not necessary because we have a shared version.
+                xfree((void *) start);
+            } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+                // If we're attempting to insert a shared constant and the
+                // existing constant is owned, then we can free the owned
+                // constant and replace it with the shared constant.
+                xfree((void *) constant->start);
+                constant->start = start;
+                bucket->type = (unsigned int) (type & 0x3);
+            }
+
+            return bucket->id;
+        }
+
+        index = (index + 1) & mask;
+    }
+
+    // IDs are allocated starting at 1, since the value 0 denotes a non-existent
+    // constant.
+    uint32_t id = ++pool->size;
+    assert(pool->size < ((uint32_t) (1 << 30)));
+
+    *bucket = (pm_constant_pool_bucket_t) {
+        .id = (unsigned int) (id & 0x3fffffff),
+        .type = (unsigned int) (type & 0x3),
+        .hash = hash
+    };
+
+    pool->constants[id - 1] = (pm_constant_t) {
+        .start = start,
+        .length = length,
+    };
+
+    return id;
+}
+
+/**
+ * Insert a constant into a constant pool. Returns the id of the constant, or
+ * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
+ */
+pm_constant_id_t
+pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
+}
+
+/**
+ * Insert a constant into a constant pool from memory that is now owned by the
+ * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
+ * potential calls to resize fail.
+ */
+pm_constant_id_t
+pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
+}
+
+/**
+ * Insert a constant into a constant pool from memory that is constant. Returns
+ * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
+ * resize fail.
+ */
+pm_constant_id_t
+pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
+}
+
+/**
+ * Free the memory associated with a constant pool.
+ */
+void
+pm_constant_pool_free(pm_constant_pool_t *pool) {
+    // For each constant in the current constant pool, free the contents if the
+    // contents are owned.
+    for (uint32_t index = 0; index < pool->capacity; index++) {
+        pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
+
+        // If an id is set on this constant, then we know we have content here.
+        if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+            pm_constant_t *constant = &pool->constants[bucket->id - 1];
+            xfree((void *) constant->start);
+        }
+    }
+
+    xfree(pool->buckets);
+}
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
new file mode 100644
index 0000000000..6df23f8f50
--- /dev/null
+++ b/prism/util/pm_constant_pool.h
@@ -0,0 +1,218 @@
+/**
+ * @file pm_constant_pool.h
+ *
+ * A data structure that stores a set of strings.
+ *
+ * Each string is assigned a unique id, which can be used to compare strings for
+ * equality. This comparison ends up being much faster than strcmp, since it
+ * only requires a single integer comparison.
+ */
+#ifndef PRISM_CONSTANT_POOL_H
+#define PRISM_CONSTANT_POOL_H
+
+#include "prism/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * When we allocate constants into the pool, we reserve 0 to mean that the slot
+ * is not yet filled. This constant is reused in other places to indicate the
+ * lack of a constant id.
+ */
+#define PM_CONSTANT_ID_UNSET 0
+
+/**
+ * A constant id is a unique identifier for a constant in the constant pool.
+ */
+typedef uint32_t pm_constant_id_t;
+
+/**
+ * A list of constant IDs. Usually used to represent a set of locals.
+ */
+typedef struct {
+    /** The number of constant ids in the list. */
+    size_t size;
+
+    /** The number of constant ids that have been allocated in the list. */
+    size_t capacity;
+
+    /** The constant ids in the list. */
+    pm_constant_id_t *ids;
+} pm_constant_id_list_t;
+
+/**
+ * Initialize a list of constant ids.
+ *
+ * @param list The list to initialize.
+ */
+void pm_constant_id_list_init(pm_constant_id_list_t *list);
+
+/**
+ * Initialize a list of constant ids with a given capacity.
+ *
+ * @param list The list to initialize.
+ * @param capacity The initial capacity of the list.
+ */
+void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
+
+/**
+ * Append a constant id to a list of constant ids. Returns false if any
+ * potential reallocations fail.
+ *
+ * @param list The list to append to.
+ * @param id The id to append.
+ * @return Whether the append succeeded.
+ */
+bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
+
+/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ *
+ * @param list The list to insert into.
+ * @param index The index at which to insert.
+ * @param id The id to insert.
+ */
+void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
+
+/**
+ * Checks if the current constant id list includes the given constant id.
+ *
+ * @param list The list to check.
+ * @param id The id to check for.
+ * @return Whether the list includes the given id.
+ */
+bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
+
+/**
+ * Free the memory associated with a list of constant ids.
+ *
+ * @param list The list to free.
+ */
+void pm_constant_id_list_free(pm_constant_id_list_t *list);
+
+/**
+ * The type of bucket in the constant pool hash map. This determines how the
+ * bucket should be freed.
+ */
+typedef unsigned int pm_constant_pool_bucket_type_t;
+
+/** By default, each constant is a slice of the source. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
+
+/** An owned constant is one for which memory has been allocated. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
+
+/** A constant constant is known at compile time. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
+
+/** A bucket in the hash map. */
+typedef struct {
+    /** The incremental ID used for indexing back into the pool. */
+    unsigned int id: 30;
+
+    /** The type of the bucket, which determines how to free it. */
+    pm_constant_pool_bucket_type_t type: 2;
+
+    /** The hash of the bucket. */
+    uint32_t hash;
+} pm_constant_pool_bucket_t;
+
+/** A constant in the pool which effectively stores a string. */
+typedef struct {
+    /** A pointer to the start of the string. */
+    const uint8_t *start;
+
+    /** The length of the string. */
+    size_t length;
+} pm_constant_t;
+
+/** The overall constant pool, which stores constants found while parsing. */
+typedef struct {
+    /** The buckets in the hash map. */
+    pm_constant_pool_bucket_t *buckets;
+
+    /** The constants that are stored in the buckets. */
+    pm_constant_t *constants;
+
+    /** The number of buckets in the hash map. */
+    uint32_t size;
+
+    /** The number of buckets that have been allocated in the hash map. */
+    uint32_t capacity;
+} pm_constant_pool_t;
+
+/**
+ * Initialize a new constant pool with a given capacity.
+ *
+ * @param pool The pool to initialize.
+ * @param capacity The initial capacity of the pool.
+ * @return Whether the initialization succeeded.
+ */
+bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
+
+/**
+ * Return a pointer to the constant indicated by the given constant id.
+ *
+ * @param pool The pool to get the constant from.
+ * @param constant_id The id of the constant to get.
+ * @return A pointer to the constant.
+ */
+pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
+
+/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ *
+ * @param pool The pool to find the constant in.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
+ * Insert a constant into a constant pool that is a slice of a source string.
+ * Returns the id of the constant, or 0 if any potential calls to resize fail.
+ *
+ * @param pool The pool to insert the constant into.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
+ * Insert a constant into a constant pool from memory that is now owned by the
+ * constant pool. Returns the id of the constant, or 0 if any potential calls to
+ * resize fail.
+ *
+ * @param pool The pool to insert the constant into.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
+
+/**
+ * Insert a constant into a constant pool from memory that is constant. Returns
+ * the id of the constant, or 0 if any potential calls to resize fail.
+ *
+ * @param pool The pool to insert the constant into.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
+ * Free the memory associated with a constant pool.
+ *
+ * @param pool The pool to free.
+ */
+void pm_constant_pool_free(pm_constant_pool_t *pool);
+
+#endif
diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c
new file mode 100644
index 0000000000..4170ecc58d
--- /dev/null
+++ b/prism/util/pm_integer.c
@@ -0,0 +1,670 @@
+#include "prism/util/pm_integer.h"
+
+/**
+ * Pull out the length and values from the integer, regardless of the form in
+ * which the length/values are stored.
+ */
+#define INTEGER_EXTRACT(integer, length_variable, values_variable) \
+    if ((integer)->values == NULL) { \
+        length_variable = 1; \
+        values_variable = &(integer)->value; \
+    } else { \
+        length_variable = (integer)->length; \
+        values_variable = (integer)->values; \
+    }
+
+/**
+ * Adds two positive pm_integer_t with the given base.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
+    size_t left_length;
+    uint32_t *left_values;
+    INTEGER_EXTRACT(left, left_length, left_values)
+
+    size_t right_length;
+    uint32_t *right_values;
+    INTEGER_EXTRACT(right, right_length, right_values)
+
+    size_t length = left_length < right_length ? right_length : left_length;
+    uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1));
+    if (values == NULL) return;
+
+    uint64_t carry = 0;
+    for (size_t index = 0; index < length; index++) {
+        uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0);
+        values[index] = (uint32_t) (sum % base);
+        carry = sum / base;
+    }
+
+    if (carry > 0) {
+        values[length] = (uint32_t) carry;
+        length++;
+    }
+
+    *destination = (pm_integer_t) { length, values, 0, false };
+}
+
+/**
+ * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
+ * base. Assume a, b, c, a - b - c all to be positive.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) {
+    size_t a_length;
+    uint32_t *a_values;
+    INTEGER_EXTRACT(a, a_length, a_values)
+
+    size_t b_length;
+    uint32_t *b_values;
+    INTEGER_EXTRACT(b, b_length, b_values)
+
+    size_t c_length;
+    uint32_t *c_values;
+    INTEGER_EXTRACT(c, c_length, c_values)
+
+    uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length);
+    int64_t carry = 0;
+
+    for (size_t index = 0; index < a_length; index++) {
+        int64_t sub = (
+            carry +
+            a_values[index] -
+            (index < b_length ? b_values[index] : 0) -
+            (index < c_length ? c_values[index] : 0)
+        );
+
+        if (sub >= 0) {
+            values[index] = (uint32_t) sub;
+            carry = 0;
+        } else {
+            sub += 2 * (int64_t) base;
+            values[index] = (uint32_t) ((uint64_t) sub % base);
+            carry = sub / (int64_t) base - 2;
+        }
+    }
+
+    while (a_length > 1 && values[a_length - 1] == 0) a_length--;
+    *destination = (pm_integer_t) { a_length, values, 0, false };
+}
+
+/**
+ * Multiply two positive integers with the given base using karatsuba algorithm.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
+    size_t left_length;
+    uint32_t *left_values;
+    INTEGER_EXTRACT(left, left_length, left_values)
+
+    size_t right_length;
+    uint32_t *right_values;
+    INTEGER_EXTRACT(right, right_length, right_values)
+
+    if (left_length > right_length) {
+        size_t temporary_length = left_length;
+        left_length = right_length;
+        right_length = temporary_length;
+
+        uint32_t *temporary_values = left_values;
+        left_values = right_values;
+        right_values = temporary_values;
+    }
+
+    if (left_length <= 10) {
+        size_t length = left_length + right_length;
+        uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+        if (values == NULL) return;
+
+        for (size_t left_index = 0; left_index < left_length; left_index++) {
+            uint32_t carry = 0;
+            for (size_t right_index = 0; right_index < right_length; right_index++) {
+                uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry;
+                values[left_index + right_index] = (uint32_t) (product % base);
+                carry = (uint32_t) (product / base);
+            }
+            values[left_index + right_length] = carry;
+        }
+
+        while (length > 1 && values[length - 1] == 0) length--;
+        *destination = (pm_integer_t) { length, values, 0, false };
+        return;
+    }
+
+    if (left_length * 2 <= right_length) {
+        uint32_t *values = (uint32_t *) xcalloc(left_length + right_length, sizeof(uint32_t));
+
+        for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) {
+            size_t end_offset = start_offset + left_length;
+            if (end_offset > right_length) end_offset = right_length;
+
+            pm_integer_t sliced_left = {
+                .length = left_length,
+                .values = left_values,
+                .value = 0,
+                .negative = false
+            };
+
+            pm_integer_t sliced_right = {
+                .length = end_offset - start_offset,
+                .values = right_values + start_offset,
+                .value = 0,
+                .negative = false
+            };
+
+            pm_integer_t product;
+            karatsuba_multiply(&product, &sliced_left, &sliced_right, base);
+
+            uint32_t carry = 0;
+            for (size_t index = 0; index < product.length; index++) {
+                uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry;
+                values[start_offset + index] = (uint32_t) (sum % base);
+                carry = (uint32_t) (sum / base);
+            }
+
+            if (carry > 0) values[start_offset + product.length] += carry;
+            pm_integer_free(&product);
+        }
+
+        *destination = (pm_integer_t) { left_length + right_length, values, 0, false };
+        return;
+    }
+
+    size_t half = left_length / 2;
+    pm_integer_t x0 = { half, left_values, 0, false };
+    pm_integer_t x1 = { left_length - half, left_values + half, 0, false };
+    pm_integer_t y0 = { half, right_values, 0, false };
+    pm_integer_t y1 = { right_length - half, right_values + half, 0, false };
+
+    pm_integer_t z0 = { 0 };
+    karatsuba_multiply(&z0, &x0, &y0, base);
+
+    pm_integer_t z2 = { 0 };
+    karatsuba_multiply(&z2, &x1, &y1, base);
+
+    // For simplicity to avoid considering negative values,
+    // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
+    pm_integer_t x01 = { 0 };
+    big_add(&x01, &x0, &x1, base);
+
+    pm_integer_t y01 = { 0 };
+    big_add(&y01, &y0, &y1, base);
+
+    pm_integer_t xy = { 0 };
+    karatsuba_multiply(&xy, &x01, &y01, base);
+
+    pm_integer_t z1;
+    big_sub2(&z1, &xy, &z0, &z2, base);
+
+    size_t length = left_length + right_length;
+    uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t));
+
+    assert(z0.values != NULL);
+    memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
+
+    assert(z2.values != NULL);
+    memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
+
+    uint32_t carry = 0;
+    for(size_t index = 0; index < z1.length; index++) {
+        uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index];
+        values[index + half] = (uint32_t) (sum % base);
+        carry = (uint32_t) (sum / base);
+    }
+
+    for(size_t index = half + z1.length; carry > 0; index++) {
+        uint64_t sum = (uint64_t) carry + values[index];
+        values[index] = (uint32_t) (sum % base);
+        carry = (uint32_t) (sum / base);
+    }
+
+    while (length > 1 && values[length - 1] == 0) length--;
+    pm_integer_free(&z0);
+    pm_integer_free(&z1);
+    pm_integer_free(&z2);
+    pm_integer_free(&x01);
+    pm_integer_free(&y01);
+    pm_integer_free(&xy);
+
+    *destination = (pm_integer_t) { length, values, 0, false };
+}
+
+/**
+ * The values of a hexadecimal digit, where the index is the ASCII character.
+ * Note that there's an odd exception here where _ is mapped to 0. This is
+ * because it's possible for us to end up trying to parse a number that has
+ * already had an error attached to it, and we want to provide _something_ to
+ * the user.
+ */
+static const int8_t pm_integer_parse_digit_values[256] = {
+//   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0, // 5x
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx
+};
+
+/**
+ * Return the value of a hexadecimal digit in a uint8_t.
+ */
+static uint8_t
+pm_integer_parse_digit(const uint8_t character) {
+    int8_t value = pm_integer_parse_digit_values[character];
+    assert(value != -1 && "invalid digit");
+
+    return (uint8_t) value;
+}
+
+/**
+ * Create a pm_integer_t from uint64_t with the given base. It is assumed that
+ * the memory for the pm_integer_t pointer has been zeroed.
+ */
+static void
+pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) {
+    if (value < base) {
+        integer->value = (uint32_t) value;
+        return;
+    }
+
+    size_t length = 0;
+    uint64_t length_value = value;
+    while (length_value > 0) {
+        length++;
+        length_value /= base;
+    }
+
+    uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length);
+    if (values == NULL) return;
+
+    for (size_t value_index = 0; value_index < length; value_index++) {
+        values[value_index] = (uint32_t) (value % base);
+        value /= base;
+    }
+
+    integer->length = length;
+    integer->values = values;
+}
+
+/**
+ * Normalize pm_integer_t.
+ * Heading zero values will be removed. If the integer fits into uint32_t,
+ * values is set to NULL, length is set to 0, and value field will be used.
+ */
+static void
+pm_integer_normalize(pm_integer_t *integer) {
+    if (integer->values == NULL) {
+        return;
+    }
+
+    while (integer->length > 1 && integer->values[integer->length - 1] == 0) {
+        integer->length--;
+    }
+
+    if (integer->length > 1) {
+        return;
+    }
+
+    uint32_t value = integer->values[0];
+    bool negative = integer->negative && value != 0;
+
+    pm_integer_free(integer);
+    *integer = (pm_integer_t) { .values = NULL, .value = value, .length = 0, .negative = negative };
+}
+
+/**
+ * Convert base of the integer.
+ * In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
+ */
+static void
+pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) {
+    size_t source_length;
+    const uint32_t *source_values;
+    INTEGER_EXTRACT(source, source_length, source_values)
+
+    size_t bigints_length = (source_length + 1) / 2;
+    assert(bigints_length > 0);
+
+    pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t));
+    if (bigints == NULL) return;
+
+    for (size_t index = 0; index < source_length; index += 2) {
+        uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0);
+        pm_integer_from_uint64(&bigints[index / 2], value, base_to);
+    }
+
+    pm_integer_t base = { 0 };
+    pm_integer_from_uint64(&base, base_from, base_to);
+
+    while (bigints_length > 1) {
+        pm_integer_t next_base;
+        karatsuba_multiply(&next_base, &base, &base, base_to);
+
+        pm_integer_free(&base);
+        base = next_base;
+
+        size_t next_length = (bigints_length + 1) / 2;
+        pm_integer_t *next_bigints = (pm_integer_t *) xcalloc(next_length, sizeof(pm_integer_t));
+
+        for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) {
+            if (bigints_index + 1 == bigints_length) {
+                next_bigints[bigints_index / 2] = bigints[bigints_index];
+            } else {
+                pm_integer_t multiplied = { 0 };
+                karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to);
+
+                big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to);
+                pm_integer_free(&bigints[bigints_index]);
+                pm_integer_free(&bigints[bigints_index + 1]);
+                pm_integer_free(&multiplied);
+            }
+        }
+
+        xfree(bigints);
+        bigints = next_bigints;
+        bigints_length = next_length;
+    }
+
+    *destination = bigints[0];
+    destination->negative = source->negative;
+    pm_integer_normalize(destination);
+
+    xfree(bigints);
+    pm_integer_free(&base);
+}
+
+#undef INTEGER_EXTRACT
+
+/**
+ * Convert digits to integer with the given power-of-two base.
+ */
+static void
+pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) {
+    size_t bit = 1;
+    while (base > (uint32_t) (1 << bit)) bit++;
+
+    size_t length = (digits_length * bit + 31) / 32;
+    uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+
+    for (size_t digit_index = 0; digit_index < digits_length; digit_index++) {
+        size_t bit_position = bit * (digits_length - digit_index - 1);
+        uint32_t value = digits[digit_index];
+
+        size_t index = bit_position / 32;
+        size_t shift = bit_position % 32;
+
+        values[index] |= value << shift;
+        if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
+    }
+
+    while (length > 1 && values[length - 1] == 0) length--;
+    *integer = (pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false };
+    pm_integer_normalize(integer);
+}
+
+/**
+ * Convert decimal digits to pm_integer_t.
+ */
+static void
+pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
+    const size_t batch = 9;
+    size_t length = (digits_length + batch - 1) / batch;
+
+    uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+    uint32_t value = 0;
+
+    for (size_t digits_index = 0; digits_index < digits_length; digits_index++) {
+        value = value * 10 + digits[digits_index];
+
+        size_t reverse_index = digits_length - digits_index - 1;
+        if (reverse_index % batch == 0) {
+            values[reverse_index / batch] = value;
+            value = 0;
+        }
+    }
+
+    // Convert base from 10**9 to 1<<32.
+    pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values,  .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
+    xfree(values);
+}
+
+/**
+ * Parse a large integer from a string that does not fit into uint32_t.
+ */
+static void
+pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
+    // Allocate an array to store digits.
+    uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
+    size_t digits_length = 0;
+
+    for (; start < end; start++) {
+        if (*start == '_') continue;
+        digits[digits_length++] = pm_integer_parse_digit(*start);
+    }
+
+    // Construct pm_integer_t from the digits.
+    if (multiplier == 10) {
+        pm_integer_parse_decimal(integer, digits, digits_length);
+    } else {
+        pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
+    }
+
+    xfree(digits);
+}
+
+/**
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ */
+void
+pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
+    // Ignore unary +. Unary - is parsed differently and will not end up here.
+    // Instead, it will modify the parsed integer later.
+    if (*start == '+') start++;
+
+    // Determine the multiplier from the base, and skip past any prefixes.
+    uint32_t multiplier = 10;
+    switch (base) {
+        case PM_INTEGER_BASE_DEFAULT:
+            while (*start == '0') start++; // 01 -> 1
+            break;
+        case PM_INTEGER_BASE_BINARY:
+            start += 2; // 0b
+            multiplier = 2;
+            break;
+        case PM_INTEGER_BASE_OCTAL:
+            start++; // 0
+            if (*start == '_' || *start == 'o' || *start == 'O') start++; // o
+            multiplier = 8;
+            break;
+        case PM_INTEGER_BASE_DECIMAL:
+            if (*start == '0' && (end - start) > 1) start += 2; // 0d
+            break;
+        case PM_INTEGER_BASE_HEXADECIMAL:
+            start += 2; // 0x
+            multiplier = 16;
+            break;
+        case PM_INTEGER_BASE_UNKNOWN:
+            if (*start == '0' && (end - start) > 1) {
+                switch (start[1]) {
+                    case '_': start += 2; multiplier = 8; break;
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
+                    case 'b': case 'B': start += 2; multiplier = 2; break;
+                    case 'o': case 'O': start += 2; multiplier = 8; break;
+                    case 'd': case 'D': start += 2; break;
+                    case 'x': case 'X': start += 2; multiplier = 16; break;
+                    default: assert(false && "unreachable"); break;
+                }
+            }
+            break;
+    }
+
+    // It's possible that we've consumed everything at this point if there is an
+    // invalid integer. If this is the case, we'll just return 0.
+    if (start >= end) return;
+
+    const uint8_t *cursor = start;
+    uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++);
+
+    for (; cursor < end; cursor++) {
+        if (*cursor == '_') continue;
+        value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor);
+
+        if (value > UINT32_MAX) {
+            // If the integer is too large to fit into a single uint32_t, then
+            // we'll parse it as a big integer.
+            pm_integer_parse_big(integer, multiplier, start, end);
+            return;
+        }
+    }
+
+    integer->value = (uint32_t) value;
+}
+
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int
+pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
+    if (left->negative != right->negative) return left->negative ? -1 : 1;
+    int negative = left->negative ? -1 : 1;
+
+    if (left->values == NULL && right->values == NULL) {
+        if (left->value < right->value) return -1 * negative;
+        if (left->value > right->value) return 1 * negative;
+        return 0;
+    }
+
+    if (left->values == NULL || left->length < right->length) return -1 * negative;
+    if (right->values == NULL || left->length > right->length) return 1 * negative;
+
+    for (size_t index = 0; index < left->length; index++) {
+        size_t value_index = left->length - index - 1;
+        uint32_t left_value = left->values[value_index];
+        uint32_t right_value = right->values[value_index];
+
+        if (left_value < right_value) return -1 * negative;
+        if (left_value > right_value) return 1 * negative;
+    }
+
+    return 0;
+}
+
+/**
+ * Reduce a ratio of integers to its simplest form.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
+    // If either the numerator or denominator do not fit into a 32-bit integer,
+    // then this function is a no-op. In the future, we may consider reducing
+    // even the larger numbers, but for now we're going to keep it simple.
+    if (
+        // If the numerator doesn't fit into a 32-bit integer, return early.
+        numerator->length != 0 ||
+        // If the denominator doesn't fit into a 32-bit integer, return early.
+        denominator->length != 0 ||
+        // If the numerator is 0, then return early.
+        numerator->value == 0 ||
+        // If the denominator is 1, then return early.
+        denominator->value == 1
+    ) return;
+
+    // Find the greatest common divisor of the numerator and denominator.
+    uint32_t divisor = numerator->value;
+    uint32_t remainder = denominator->value;
+
+    while (remainder != 0) {
+        uint32_t temporary = remainder;
+        remainder = divisor % remainder;
+        divisor = temporary;
+    }
+
+    // Divide the numerator and denominator by the greatest common divisor.
+    numerator->value /= divisor;
+    denominator->value /= divisor;
+}
+
+/**
+ * Convert an integer to a decimal string.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
+    if (integer->negative) {
+        pm_buffer_append_byte(buffer, '-');
+    }
+
+    // If the integer fits into a single uint32_t, then we can just append the
+    // value directly to the buffer.
+    if (integer->values == NULL) {
+        pm_buffer_append_format(buffer, "%" PRIu32, integer->value);
+        return;
+    }
+
+    // If the integer is two uint32_t values, then we can | them together and
+    // append the result to the buffer.
+    if (integer->length == 2) {
+        const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32);
+        pm_buffer_append_format(buffer, "%" PRIu64, value);
+        return;
+    }
+
+    // Otherwise, first we'll convert the base from 1<<32 to 10**9.
+    pm_integer_t converted = { 0 };
+    pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000);
+
+    if (converted.values == NULL) {
+        pm_buffer_append_format(buffer, "%" PRIu32, converted.value);
+        pm_integer_free(&converted);
+        return;
+    }
+
+    // Allocate a buffer that we'll copy the decimal digits into.
+    size_t digits_length = converted.length * 9;
+    char *digits = xcalloc(digits_length, sizeof(char));
+    if (digits == NULL) return;
+
+    // Pack bigdecimal to digits.
+    for (size_t value_index = 0; value_index < converted.length; value_index++) {
+        uint32_t value = converted.values[value_index];
+
+        for (size_t digit_index = 0; digit_index < 9; digit_index++) {
+            digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10);
+            value /= 10;
+        }
+    }
+
+    size_t start_offset = 0;
+    while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++;
+
+    // Finally, append the string to the buffer and free the digits.
+    pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
+    xfree(digits);
+    pm_integer_free(&converted);
+}
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single uint32_t.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_integer_free(pm_integer_t *integer) {
+    if (integer->values) {
+        xfree(integer->values);
+    }
+}
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
new file mode 100644
index 0000000000..304665e620
--- /dev/null
+++ b/prism/util/pm_integer.h
@@ -0,0 +1,130 @@
+/**
+ * @file pm_integer.h
+ *
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_NUMBER_H
+#define PRISM_NUMBER_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_buffer.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * A structure represents an arbitrary-sized integer.
+ */
+typedef struct {
+    /**
+     * The number of allocated values. length is set to 0 if the integer fits
+     * into uint32_t.
+     */
+    size_t length;
+
+    /**
+     * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
+     */
+    uint32_t *values;
+
+    /**
+     * Embedded value for small integer. This value is set to 0 if the value
+     * does not fit into uint32_t.
+     */
+    uint32_t value;
+
+    /**
+     * Whether or not the integer is negative. It is stored this way so that a
+     * zeroed pm_integer_t is always positive zero.
+     */
+    bool negative;
+} pm_integer_t;
+
+/**
+ * An enum controlling the base of an integer. It is expected that the base is
+ * already known before parsing the integer, even though it could be derived
+ * from the string itself.
+ */
+typedef enum {
+    /** The default decimal base, with no prefix. Leading 0s will be ignored. */
+    PM_INTEGER_BASE_DEFAULT,
+
+    /** The binary base, indicated by a 0b or 0B prefix. */
+    PM_INTEGER_BASE_BINARY,
+
+    /** The octal base, indicated by a 0, 0o, or 0O prefix. */
+    PM_INTEGER_BASE_OCTAL,
+
+    /** The decimal base, indicated by a 0d, 0D, or empty prefix. */
+    PM_INTEGER_BASE_DECIMAL,
+
+    /** The hexadecimal base, indicated by a 0x or 0X prefix. */
+    PM_INTEGER_BASE_HEXADECIMAL,
+
+    /**
+     * An unknown base, in which case pm_integer_parse will derive it based on
+     * the content of the string. This is less efficient and does more
+     * comparisons, so if callers know the base ahead of time, they should use
+     * that instead.
+     */
+    PM_INTEGER_BASE_UNKNOWN
+} pm_integer_base_t;
+
+/**
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ *
+ * @param integer The integer to parse into.
+ * @param base The base of the integer.
+ * @param start The start of the string.
+ * @param end The end of the string.
+ */
+void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
+
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ *
+ * @param left The left integer to compare.
+ * @param right The right integer to compare.
+ * @return The result of the comparison.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
+/**
+ * Reduce a ratio of integers to its simplest form.
+ *
+ * If either the numerator or denominator do not fit into a 32-bit integer, then
+ * this function is a no-op. In the future, we may consider reducing even the
+ * larger numbers, but for now we're going to keep it simple.
+ *
+ * @param numerator The numerator of the ratio.
+ * @param denominator The denominator of the ratio.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
+
+/**
+ * Convert an integer to a decimal string.
+ *
+ * @param buffer The buffer to append the string to.
+ * @param integer The integer to convert to a string.
+ *
+ * \public \memberof pm_integer_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single node in the linked list.
+ *
+ * @param integer The integer to free.
+ *
+ * \public \memberof pm_integer_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer);
+
+#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
new file mode 100644
index 0000000000..ad2294cd60
--- /dev/null
+++ b/prism/util/pm_list.c
@@ -0,0 +1,49 @@
+#include "prism/util/pm_list.h"
+
+/**
+ * Returns true if the given list is empty.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_list_empty_p(pm_list_t *list) {
+    return list->head == NULL;
+}
+
+/**
+ * Returns the size of the list.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_list_size(pm_list_t *list) {
+    return list->size;
+}
+
+/**
+ * Append a node to the given list.
+ */
+void
+pm_list_append(pm_list_t *list, pm_list_node_t *node) {
+    if (list->head == NULL) {
+        list->head = node;
+    } else {
+        list->tail->next = node;
+    }
+
+    list->tail = node;
+    list->size++;
+}
+
+/**
+ * Deallocate the internal state of the given list.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_list_free(pm_list_t *list) {
+    pm_list_node_t *node = list->head;
+    pm_list_node_t *next;
+
+    while (node != NULL) {
+        next = node->next;
+        xfree(node);
+        node = next;
+    }
+
+    list->size = 0;
+}
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
new file mode 100644
index 0000000000..f544bb2943
--- /dev/null
+++ b/prism/util/pm_list.h
@@ -0,0 +1,103 @@
+/**
+ * @file pm_list.h
+ *
+ * An abstract linked list.
+ */
+#ifndef PRISM_LIST_H
+#define PRISM_LIST_H
+
+#include "prism/defines.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * This struct represents an abstract linked list that provides common
+ * functionality. It is meant to be used any time a linked list is necessary to
+ * store data.
+ *
+ * The linked list itself operates off a set of pointers. Because the pointers
+ * are not necessarily sequential, they can be of any size. We use this fact to
+ * allow the consumer of this linked list to extend the node struct to include
+ * any data they want. This is done by using the pm_list_node_t as the first
+ * member of the struct.
+ *
+ * For example, if we want to store a list of integers, we can do the following:
+ *
+ * ```c
+ * typedef struct {
+ *     pm_list_node_t node;
+ *     int value;
+ * } pm_int_node_t;
+ *
+ * pm_list_t list = { 0 };
+ * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t));
+ * node->value = 5;
+ *
+ * pm_list_append(&list, &node->node);
+ * ```
+ *
+ * The pm_list_t struct is used to represent the overall linked list. It
+ * contains a pointer to the head and tail of the list. This allows for easy
+ * iteration and appending of new nodes.
+ */
+typedef struct pm_list_node {
+    /** A pointer to the next node in the list. */
+    struct pm_list_node *next;
+} pm_list_node_t;
+
+/**
+ * This represents the overall linked list. It keeps a pointer to the head and
+ * tail so that iteration is easy and pushing new nodes is easy.
+ */
+typedef struct {
+    /** The size of the list. */
+    size_t size;
+
+    /** A pointer to the head of the list. */
+    pm_list_node_t *head;
+
+    /** A pointer to the tail of the list. */
+    pm_list_node_t *tail;
+} pm_list_t;
+
+/**
+ * Returns true if the given list is empty.
+ *
+ * @param list The list to check.
+ * @return True if the given list is empty, otherwise false.
+ *
+ * \public \memberof pm_list_t
+ */
+PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
+
+/**
+ * Returns the size of the list.
+ *
+ * @param list The list to check.
+ * @return The size of the list.
+ *
+ * \public \memberof pm_list_t
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
+
+/**
+ * Append a node to the given list.
+ *
+ * @param list The list to append to.
+ * @param node The node to append.
+ */
+void pm_list_append(pm_list_t *list, pm_list_node_t *node);
+
+/**
+ * Deallocate the internal state of the given list.
+ *
+ * @param list The list to free.
+ *
+ * \public \memberof pm_list_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
+
+#endif
diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c
new file mode 100644
index 0000000000..7ea20ace6d
--- /dev/null
+++ b/prism/util/pm_memchr.c
@@ -0,0 +1,35 @@
+#include "prism/util/pm_memchr.h"
+
+#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+
+/**
+ * We need to roll our own memchr to handle cases where the encoding changes and
+ * we need to search for a character in a buffer that could be the trailing byte
+ * of a multibyte character.
+ */
+void *
+pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
+    if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
+        const uint8_t *source = (const uint8_t *) memory;
+        size_t index = 0;
+
+        while (index < number) {
+            if (source[index] == character) {
+                return (void *) (source + index);
+            }
+
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
+            if (width == 0) {
+                return NULL;
+            }
+
+            index += width;
+        }
+
+        return NULL;
+    } else {
+        return memchr(memory, character, number);
+    }
+}
+
+#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
new file mode 100644
index 0000000000..e0671eaed3
--- /dev/null
+++ b/prism/util/pm_memchr.h
@@ -0,0 +1,29 @@
+/**
+ * @file pm_memchr.h
+ *
+ * A custom memchr implementation.
+ */
+#ifndef PRISM_MEMCHR_H
+#define PRISM_MEMCHR_H
+
+#include "prism/defines.h"
+#include "prism/encoding.h"
+
+#include <stddef.h>
+
+/**
+ * We need to roll our own memchr to handle cases where the encoding changes and
+ * we need to search for a character in a buffer that could be the trailing byte
+ * of a multibyte character.
+ *
+ * @param source The source string.
+ * @param character The character to search for.
+ * @param number The maximum number of bytes to search.
+ * @param encoding_changed Whether the encoding changed.
+ * @param encoding A pointer to the encoding.
+ * @return A pointer to the first occurrence of the character in the source
+ *     string, or NULL if no such character exists.
+ */
+void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
+
+#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
new file mode 100644
index 0000000000..8331618f54
--- /dev/null
+++ b/prism/util/pm_newline_list.c
@@ -0,0 +1,125 @@
+#include "prism/util/pm_newline_list.h"
+
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ */
+bool
+pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
+    list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
+    if (list->offsets == NULL) return false;
+
+    list->start = start;
+
+    // This is 1 instead of 0 because we want to include the first line of the
+    // file as having offset 0, which is set because of calloc.
+    list->size = 1;
+    list->capacity = capacity;
+
+    return true;
+}
+
+/**
+ * Clear out the newlines that have been appended to the list.
+ */
+void
+pm_newline_list_clear(pm_newline_list_t *list) {
+    list->size = 1;
+}
+
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ */
+bool
+pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
+    if (list->size == list->capacity) {
+        size_t *original_offsets = list->offsets;
+
+        list->capacity = (list->capacity * 3) / 2;
+        list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
+        if (list->offsets == NULL) return false;
+
+        memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
+        xfree(original_offsets);
+    }
+
+    assert(*cursor == '\n');
+    assert(cursor >= list->start);
+    size_t newline_offset = (size_t) (cursor - list->start + 1);
+
+    assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
+    list->offsets[list->size++] = newline_offset;
+
+    return true;
+}
+
+/**
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ */
+int32_t
+pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
+    assert(cursor >= list->start);
+    size_t offset = (size_t) (cursor - list->start);
+
+    size_t left = 0;
+    size_t right = list->size - 1;
+
+    while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+
+        if (list->offsets[mid] == offset) {
+            return ((int32_t) mid) + start_line;
+        }
+
+        if (list->offsets[mid] < offset) {
+            left = mid + 1;
+        } else {
+            right = mid - 1;
+        }
+    }
+
+    return ((int32_t) left) + start_line - 1;
+}
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ */
+pm_line_column_t
+pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
+    assert(cursor >= list->start);
+    size_t offset = (size_t) (cursor - list->start);
+
+    size_t left = 0;
+    size_t right = list->size - 1;
+
+    while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+
+        if (list->offsets[mid] == offset) {
+            return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
+        }
+
+        if (list->offsets[mid] < offset) {
+            left = mid + 1;
+        } else {
+            right = mid - 1;
+        }
+    }
+
+    return ((pm_line_column_t) {
+        .line = ((int32_t) left) + start_line - 1,
+        .column = (uint32_t) (offset - list->offsets[left - 1])
+    });
+}
+
+/**
+ * Free the internal memory allocated for the newline list.
+ */
+void
+pm_newline_list_free(pm_newline_list_t *list) {
+    xfree(list->offsets);
+}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
new file mode 100644
index 0000000000..406abe8ba5
--- /dev/null
+++ b/prism/util/pm_newline_list.h
@@ -0,0 +1,113 @@
+/**
+ * @file pm_newline_list.h
+ *
+ * A list of byte offsets of newlines in a string.
+ *
+ * When compiling the syntax tree, it's necessary to know the line and column
+ * of many nodes. This is necessary to support things like error messages,
+ * tracepoints, etc.
+ *
+ * It's possible that we could store the start line, start column, end line, and
+ * end column on every node in addition to the offsets that we already store,
+ * but that would be quite a lot of memory overhead.
+ */
+#ifndef PRISM_NEWLINE_LIST_H
+#define PRISM_NEWLINE_LIST_H
+
+#include "prism/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+/**
+ * A list of offsets of newlines in a string. The offsets are assumed to be
+ * sorted/inserted in ascending order.
+ */
+typedef struct {
+    /** A pointer to the start of the source string. */
+    const uint8_t *start;
+
+    /** The number of offsets in the list. */
+    size_t size;
+
+    /** The capacity of the list that has been allocated. */
+    size_t capacity;
+
+    /** The list of offsets. */
+    size_t *offsets;
+} pm_newline_list_t;
+
+/**
+ * A line and column in a string.
+ */
+typedef struct {
+    /** The line number. */
+    int32_t line;
+
+    /** The column number. */
+    uint32_t column;
+} pm_line_column_t;
+
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ *
+ * @param list The list to initialize.
+ * @param start A pointer to the start of the source string.
+ * @param capacity The initial capacity of the list.
+ * @return True if the allocation of the offsets succeeds, otherwise false.
+ */
+bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
+
+/**
+ * Clear out the newlines that have been appended to the list.
+ *
+ * @param list The list to clear.
+ */
+void
+pm_newline_list_clear(pm_newline_list_t *list);
+
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ *
+ * @param list The list to append to.
+ * @param cursor A pointer to the offset to append.
+ * @return True if the reallocation of the offsets succeeds (if one was
+ *     necessary), otherwise false.
+ */
+bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
+
+/**
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ *
+ * @param list The list to search.
+ * @param cursor A pointer to the offset to search for.
+ * @param start_line The line to start counting from.
+ * @return The line of the given offset.
+ */
+int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ *
+ * @param list The list to search.
+ * @param cursor A pointer to the offset to search for.
+ * @param start_line The line to start counting from.
+ * @return The line and column of the given offset.
+ */
+pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
+
+/**
+ * Free the internal memory allocated for the newline list.
+ *
+ * @param list The list to free.
+ */
+void pm_newline_list_free(pm_newline_list_t *list);
+
+#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
new file mode 100644
index 0000000000..a7493c468b
--- /dev/null
+++ b/prism/util/pm_string.c
@@ -0,0 +1,381 @@
+#include "prism/util/pm_string.h"
+
+static const uint8_t empty_source[] = "";
+
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_sizeof(void) {
+    return sizeof(pm_string_t);
+}
+
+/**
+ * Initialize a shared string that is based on initial input.
+ */
+void
+pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
+    assert(start <= end);
+
+    *string = (pm_string_t) {
+        .type = PM_STRING_SHARED,
+        .source = start,
+        .length = (size_t) (end - start)
+    };
+}
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ */
+void
+pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_OWNED,
+        .source = source,
+        .length = length
+    };
+}
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ */
+void
+pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_CONSTANT,
+        .source = (const uint8_t *) source,
+        .length = length
+    };
+}
+
+#ifdef _WIN32
+/**
+ * Represents a file handle on Windows, where the path will need to be freed
+ * when the file is closed.
+ */
+typedef struct {
+    /** The path to the file, which will become allocated memory. */
+    WCHAR *path;
+
+    /** The handle to the file, which will start as uninitialized memory. */
+    HANDLE file;
+} pm_string_file_handle_t;
+
+/**
+ * Open the file indicated by the filepath parameter for reading on Windows.
+ * Perform any kind of normalization that needs to happen on the filepath.
+ */
+static pm_string_init_result_t
+pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
+    int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
+    if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
+
+    handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
+    if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
+        xfree(handle->path);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
+    if (handle->file == INVALID_HANDLE_VALUE) {
+        pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
+
+        if (GetLastError() == ERROR_ACCESS_DENIED) {
+            DWORD attributes = GetFileAttributesW(handle->path);
+            if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+                result = PM_STRING_INIT_ERROR_DIRECTORY;
+            }
+        }
+
+        xfree(handle->path);
+        return result;
+    }
+
+    return PM_STRING_INIT_SUCCESS;
+}
+
+/**
+ * Close the file handle and free the path.
+ */
+static void
+pm_string_file_handle_close(pm_string_file_handle_t *handle) {
+    xfree(handle->path);
+    CloseHandle(handle->file);
+}
+#endif
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t
+pm_string_mapped_init(pm_string_t *string, const char *filepath) {
+#ifdef _WIN32
+    // Open the file for reading.
+    pm_string_file_handle_t handle;
+    pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
+    if (result != PM_STRING_INIT_SUCCESS) return result;
+
+    // Get the file size.
+    DWORD file_size = GetFileSize(handle.file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        pm_string_file_handle_close(&handle);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (file_size == 0) {
+        pm_string_file_handle_close(&handle);
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+        return PM_STRING_INIT_SUCCESS;
+    }
+
+    // Create a mapping of the file.
+    HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (mapping == NULL) {
+        pm_string_file_handle_close(&handle);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Map the file into memory.
+    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    CloseHandle(mapping);
+    pm_string_file_handle_close(&handle);
+
+    if (source == NULL) {
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
+    return PM_STRING_INIT_SUCCESS;
+#elif defined(_POSIX_MAPPED_FILES)
+    // Open the file for reading
+    int fd = open(filepath, O_RDONLY);
+    if (fd == -1) {
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Stat the file to get the file size
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Ensure it is a file and not a directory
+    if (S_ISDIR(sb.st_mode)) {
+        close(fd);
+        return PM_STRING_INIT_ERROR_DIRECTORY;
+    }
+
+    // mmap the file descriptor to virtually get the contents
+    size_t size = (size_t) sb.st_size;
+    uint8_t *source = NULL;
+
+    if (size == 0) {
+        close(fd);
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+        return PM_STRING_INIT_SUCCESS;
+    }
+
+    source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (source == MAP_FAILED) {
+        close(fd);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    close(fd);
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
+    return PM_STRING_INIT_SUCCESS;
+#else
+    return pm_string_file_init(string, filepath);
+#endif
+}
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t
+pm_string_file_init(pm_string_t *string, const char *filepath) {
+#ifdef _WIN32
+    // Open the file for reading.
+    pm_string_file_handle_t handle;
+    pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
+    if (result != PM_STRING_INIT_SUCCESS) return result;
+
+    // Get the file size.
+    DWORD file_size = GetFileSize(handle.file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        pm_string_file_handle_close(&handle);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (file_size == 0) {
+        pm_string_file_handle_close(&handle);
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+        return PM_STRING_INIT_SUCCESS;
+    }
+
+    // Create a buffer to read the file into.
+    uint8_t *source = xmalloc(file_size);
+    if (source == NULL) {
+        pm_string_file_handle_close(&handle);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Read the contents of the file
+    DWORD bytes_read;
+    if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
+        pm_string_file_handle_close(&handle);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Check the number of bytes read
+    if (bytes_read != file_size) {
+        xfree(source);
+        pm_string_file_handle_close(&handle);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    pm_string_file_handle_close(&handle);
+    *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
+    return PM_STRING_INIT_SUCCESS;
+#elif defined(PRISM_HAS_FILESYSTEM)
+    // Open the file for reading
+    int fd = open(filepath, O_RDONLY);
+    if (fd == -1) {
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Stat the file to get the file size
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    // Ensure it is a file and not a directory
+    if (S_ISDIR(sb.st_mode)) {
+        close(fd);
+        return PM_STRING_INIT_ERROR_DIRECTORY;
+    }
+
+    // Check the size to see if it's empty
+    size_t size = (size_t) sb.st_size;
+    if (size == 0) {
+        close(fd);
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = empty_source, .length = 0 };
+        return PM_STRING_INIT_SUCCESS;
+    }
+
+    size_t length = (size_t) size;
+    uint8_t *source = xmalloc(length);
+    if (source == NULL) {
+        close(fd);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    long bytes_read = (long) read(fd, source, length);
+    close(fd);
+
+    if (bytes_read == -1) {
+        xfree(source);
+        return PM_STRING_INIT_ERROR_GENERIC;
+    }
+
+    *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
+    return PM_STRING_INIT_SUCCESS;
+#else
+    (void) string;
+    (void) filepath;
+    perror("pm_string_file_init is not implemented for this platform");
+    return PM_STRING_INIT_ERROR_GENERIC;
+#endif
+}
+
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ */
+void
+pm_string_ensure_owned(pm_string_t *string) {
+    if (string->type == PM_STRING_OWNED) return;
+
+    size_t length = pm_string_length(string);
+    const uint8_t *source = pm_string_source(string);
+
+    uint8_t *memory = xmalloc(length);
+    if (!memory) return;
+
+    pm_string_owned_init(string, memory, length);
+    memcpy((void *) string->source, source, length);
+}
+
+/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int
+pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
+    size_t left_length = pm_string_length(left);
+    size_t right_length = pm_string_length(right);
+
+    if (left_length < right_length) {
+        return -1;
+    } else if (left_length > right_length) {
+        return 1;
+    }
+
+    return memcmp(pm_string_source(left), pm_string_source(right), left_length);
+}
+
+/**
+ * Returns the length associated with the string.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_length(const pm_string_t *string) {
+    return string->length;
+}
+
+/**
+ * Returns the start pointer associated with the string.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t *
+pm_string_source(const pm_string_t *string) {
+    return string->source;
+}
+
+/**
+ * Free the associated memory of the given string.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_string_free(pm_string_t *string) {
+    void *memory = (void *) string->source;
+
+    if (string->type == PM_STRING_OWNED) {
+        xfree(memory);
+#ifdef PRISM_HAS_MMAP
+    } else if (string->type == PM_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+        UnmapViewOfFile(memory);
+#elif defined(_POSIX_MAPPED_FILES)
+        munmap(memory, string->length);
+#endif
+#endif /* PRISM_HAS_MMAP */
+    }
+}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
new file mode 100644
index 0000000000..d8456ff294
--- /dev/null
+++ b/prism/util/pm_string.h
@@ -0,0 +1,200 @@
+/**
+ * @file pm_string.h
+ *
+ * A generic string type that can have various ownership semantics.
+ */
+#ifndef PRISM_STRING_H
+#define PRISM_STRING_H
+
+#include "prism/defines.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// The following headers are necessary to read files using demand paging.
+#ifdef _WIN32
+#include <windows.h>
+#elif defined(_POSIX_MAPPED_FILES)
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#elif defined(PRISM_HAS_FILESYSTEM)
+#include <fcntl.h>
+#include <sys/stat.h>
+#endif
+
+/**
+ * A generic string type that can have various ownership semantics.
+ */
+typedef struct {
+    /** A pointer to the start of the string. */
+    const uint8_t *source;
+
+    /** The length of the string in bytes of memory. */
+    size_t length;
+
+    /** The type of the string. This field determines how the string should be freed. */
+    enum {
+        /** This string is a constant string, and should not be freed. */
+        PM_STRING_CONSTANT,
+
+        /** This is a slice of another string, and should not be freed. */
+        PM_STRING_SHARED,
+
+        /** This string owns its memory, and should be freed using `pm_string_free()`. */
+        PM_STRING_OWNED,
+
+#ifdef PRISM_HAS_MMAP
+        /** This string is a memory-mapped file, and should be freed using `pm_string_free()`. */
+        PM_STRING_MAPPED
+#endif
+    } type;
+} pm_string_t;
+
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ *
+ * @return The size of the pm_string_t struct.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
+
+/**
+ * Defines an empty string. This is useful for initializing a string that will
+ * be filled in later.
+ */
+#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
+
+/**
+ * Initialize a shared string that is based on initial input.
+ *
+ * @param string The string to initialize.
+ * @param start The start of the string.
+ * @param end The end of the string.
+ */
+void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
+
+/**
+ * Represents the result of calling pm_string_mapped_init or
+ * pm_string_file_init. We need this additional information because there is
+ * not a platform-agnostic way to indicate that the file that was attempted to
+ * be opened was a directory.
+ */
+typedef enum {
+    /** Indicates that the string was successfully initialized. */
+    PM_STRING_INIT_SUCCESS = 0,
+    /**
+     * Indicates a generic error from a string_*_init function, where the type
+     * of error should be read from `errno` or `GetLastError()`.
+     */
+    PM_STRING_INIT_ERROR_GENERIC = 1,
+    /**
+     * Indicates that the file that was attempted to be opened was a directory.
+     */
+    PM_STRING_INIT_ERROR_DIRECTORY = 2
+} pm_string_init_result_t;
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ *
+ * @param string The string to initialize.
+ * @param filepath The filepath to read.
+ * @return The success of the read, indicated by the value of the enum.
+ *
+ * \public \memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * @param string The string to initialize.
+ * @param filepath The filepath to read.
+ * @return The success of the read, indicated by the value of the enum.
+ *
+ * \public \memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
+
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ *
+ * @param string The string to ensure is owned.
+ */
+void pm_string_ensure_owned(pm_string_t *string);
+
+/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ *
+ * @param left The left string to compare.
+ * @param right The right string to compare.
+ * @return The comparison result.
+ */
+int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
+
+/**
+ * Returns the length associated with the string.
+ *
+ * @param string The string to get the length of.
+ * @return The length of the string.
+ *
+ * \public \memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
+
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @param string The string to get the start pointer of.
+ * @return The start pointer of the string.
+ *
+ * \public \memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
+
+/**
+ * Free the associated memory of the given string.
+ *
+ * @param string The string to free.
+ *
+ * \public \memberof pm_string_t
+ */
+PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c
new file mode 100644
index 0000000000..3f58421554
--- /dev/null
+++ b/prism/util/pm_strncasecmp.c
@@ -0,0 +1,36 @@
+#include "prism/util/pm_strncasecmp.h"
+
+/**
+ * A locale-insensitive version of `tolower(3)`
+ */
+static inline int
+pm_tolower(int c)
+{
+    if ('A' <= c && c <= 'Z') {
+        return c | 0x20;
+    }
+    return c;
+}
+
+/**
+ * Compare two strings, ignoring case, up to the given length. Returns 0 if the
+ * strings are equal, a negative number if string1 is less than string2, or a
+ * positive number if string1 is greater than string2.
+ *
+ * Note that this is effectively our own implementation of strncasecmp, but it's
+ * not available on all of the platforms we want to support so we're rolling it
+ * here.
+ */
+int
+pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
+    size_t offset = 0;
+    int difference = 0;
+
+    while (offset < length && string1[offset] != '\0') {
+        if (string2[offset] == '\0') return string1[offset];
+        if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference;
+        offset++;
+    }
+
+    return difference;
+}
diff --git a/prism/util/pm_strncasecmp.h b/prism/util/pm_strncasecmp.h
new file mode 100644
index 0000000000..5cb88cb5eb
--- /dev/null
+++ b/prism/util/pm_strncasecmp.h
@@ -0,0 +1,32 @@
+/**
+ * @file pm_strncasecmp.h
+ *
+ * A custom strncasecmp implementation.
+ */
+#ifndef PRISM_STRNCASECMP_H
+#define PRISM_STRNCASECMP_H
+
+#include "prism/defines.h"
+
+#include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Compare two strings, ignoring case, up to the given length. Returns 0 if the
+ * strings are equal, a negative number if string1 is less than string2, or a
+ * positive number if string1 is greater than string2.
+ *
+ * Note that this is effectively our own implementation of strncasecmp, but it's
+ * not available on all of the platforms we want to support so we're rolling it
+ * here.
+ *
+ * @param string1 The first string to compare.
+ * @param string2 The second string to compare
+ * @param length The maximum number of characters to compare.
+ * @return 0 if the strings are equal, a negative number if string1 is less than
+ *     string2, or a positive number if string1 is greater than string2.
+ */
+int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
+
+#endif
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
new file mode 100644
index 0000000000..916a4cc3fd
--- /dev/null
+++ b/prism/util/pm_strpbrk.c
@@ -0,0 +1,206 @@
+#include "prism/util/pm_strpbrk.h"
+
+/**
+ * Add an invalid multibyte character error to the parser.
+ */
+static inline void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
+}
+
+/**
+ * Set the explicit encoding for the parser to the current encoding.
+ */
+static inline void
+pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
+    if (parser->explicit_encoding != NULL) {
+        if (parser->explicit_encoding == parser->encoding) {
+            // Okay, we already locked to this encoding.
+        } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            // Not okay, we already found a Unicode escape sequence and this
+            // conflicts.
+            pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+        } else {
+            // Should not be anything else.
+            assert(false && "unreachable");
+        }
+    }
+
+    parser->explicit_encoding = parser->encoding;
+}
+
+/**
+ * This is the default path.
+ */
+static inline const uint8_t *
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
+
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the path when the encoding is ASCII-8BIT.
+ */
+static inline const uint8_t *
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
+        index++;
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the slow path that does care about the encoding.
+ */
+static inline const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    const pm_encoding_t *encoding = parser->encoding;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
+
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static inline const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    const pm_encoding_t *encoding = parser->encoding;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80 || !validate) {
+            index++;
+        } else {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            pm_strpbrk_explicit_encoding_set(parser, source, width);
+
+            if (width > 0) {
+                index += width;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
+const uint8_t *
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
+    if (length <= 0) {
+        return NULL;
+    } else if (!parser->encoding_changed) {
+        return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
+    } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+        return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
+    } else if (parser->encoding->multibyte) {
+        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
+    } else {
+        return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
+    }
+}
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
new file mode 100644
index 0000000000..f387bd5782
--- /dev/null
+++ b/prism/util/pm_strpbrk.h
@@ -0,0 +1,46 @@
+/**
+ * @file pm_strpbrk.h
+ *
+ * A custom strpbrk implementation.
+ */
+#ifndef PRISM_STRPBRK_H
+#define PRISM_STRPBRK_H
+
+#include "prism/defines.h"
+#include "prism/diagnostic.h"
+#include "prism/parser.h"
+
+#include <stddef.h>
+#include <string.h>
+
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ *
+ * @param parser The parser.
+ * @param source The source to search.
+ * @param charset The charset to search for.
+ * @param length The maximum number of bytes to search.
+ * @param validate Whether to validate that the source string is valid in the
+ *     current encoding of the parser.
+ * @return A pointer to the first character in the source string that is in the
+ *     charset, or NULL if no such character exists.
+ */
+const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
+
+#endif
diff --git a/prism/version.h b/prism/version.h
new file mode 100644
index 0000000000..0ef7435c17
--- /dev/null
+++ b/prism/version.h
@@ -0,0 +1,29 @@
+/**
+ * @file version.h
+ *
+ * The version of the Prism library.
+ */
+#ifndef PRISM_VERSION_H
+#define PRISM_VERSION_H
+
+/**
+ * The major version of the Prism library as an int.
+ */
+#define PRISM_VERSION_MAJOR 1
+
+/**
+ * The minor version of the Prism library as an int.
+ */
+#define PRISM_VERSION_MINOR 8
+
+/**
+ * The patch version of the Prism library as an int.
+ */
+#define PRISM_VERSION_PATCH 0
+
+/**
+ * The version of the Prism library as a constant string.
+ */
+#define PRISM_VERSION "1.8.0"
+
+#endif