20 files changed, 3611 insertions, 0 deletions
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
new file mode 100644
index 0000000000..1e79e46718
--- /dev/null
+++ b/prism/util/pm_buffer.c
@@ -0,0 +1,317 @@
+#include "prism/util/pm_buffer.h"
+
+/**
+ * Return the size of the pm_buffer_t struct.
+ */
+size_t
+pm_buffer_sizeof(void) {
+    return sizeof(pm_buffer_t);
+}
+
+/**
+ * Initialize a pm_buffer_t with the given capacity.
+ */
+bool
+pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
+    buffer->length = 0;
+    buffer->capacity = capacity;
+
+    buffer->value = (char *) xmalloc(capacity);
+    return buffer->value != NULL;
+}
+
+/**
+ * Initialize a pm_buffer_t with its default values.
+ */
+bool
+pm_buffer_init(pm_buffer_t *buffer) {
+    return pm_buffer_init_capacity(buffer, 1024);
+}
+
+/**
+ * Return the value of the buffer.
+ */
+char *
+pm_buffer_value(const pm_buffer_t *buffer) {
+    return buffer->value;
+}
+
+/**
+ * Return the length of the buffer.
+ */
+size_t
+pm_buffer_length(const pm_buffer_t *buffer) {
+    return buffer->length;
+}
+
+/**
+ * Append the given amount of space to the buffer.
+ */
+static inline bool
+pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
+    size_t next_length = buffer->length + length;
+
+    if (next_length > buffer->capacity) {
+        if (buffer->capacity == 0) {
+            buffer->capacity = 1;
+        }
+
+        while (next_length > buffer->capacity) {
+            buffer->capacity *= 2;
+        }
+
+        buffer->value = xrealloc(buffer->value, buffer->capacity);
+        if (buffer->value == NULL) return false;
+    }
+
+    buffer->length = next_length;
+    return true;
+}
+
+/**
+ * Append a generic pointer to memory to the buffer.
+ */
+static inline void
+pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        memcpy(buffer->value + cursor, source, length);
+    }
+}
+
+/**
+ * Append the given amount of space as zeroes to the buffer.
+ */
+void
+pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        memset(buffer->value + cursor, 0, length);
+    }
+}
+
+/**
+ * Append a formatted string to the buffer.
+ */
+void
+pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) {
+    va_list arguments;
+    va_start(arguments, format);
+    int result = vsnprintf(NULL, 0, format, arguments);
+    va_end(arguments);
+
+    if (result < 0) return;
+    size_t length = (size_t) (result + 1);
+
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        va_start(arguments, format);
+        vsnprintf(buffer->value + cursor, length, format, arguments);
+        va_end(arguments);
+        buffer->length--;
+    }
+}
+
+/**
+ * Append a string to the buffer.
+ */
+void
+pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length) {
+    pm_buffer_append(buffer, value, length);
+}
+
+/**
+ * Append a list of bytes to the buffer.
+ */
+void
+pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
+    pm_buffer_append(buffer, (const char *) value, length);
+}
+
+/**
+ * Append a single byte to the buffer.
+ */
+void
+pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
+    const void *source = &value;
+    pm_buffer_append(buffer, source, sizeof(uint8_t));
+}
+
+/**
+ * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
+ */
+void
+pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
+    if (value < 128) {
+        pm_buffer_append_byte(buffer, (uint8_t) value);
+    } else {
+        uint32_t n = value;
+        while (n >= 128) {
+            pm_buffer_append_byte(buffer, (uint8_t) (n | 128));
+            n >>= 7;
+        }
+        pm_buffer_append_byte(buffer, (uint8_t) n);
+    }
+}
+
+/**
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
+ */
+void
+pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
+    uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
+    pm_buffer_append_varuint(buffer, unsigned_int);
+}
+
+/**
+ * Append a double to the buffer.
+ */
+void
+pm_buffer_append_double(pm_buffer_t *buffer, double value) {
+    const void *source = &value;
+    pm_buffer_append(buffer, source, sizeof(double));
+}
+
+/**
+ * Append a slice of source code to the buffer.
+ */
+void
+pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) {
+    for (size_t index = 0; index < length; index++) {
+        const uint8_t byte = source[index];
+
+        if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
+            if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+                pm_buffer_append_format(buffer, "\\x%02X", byte);
+            } else {
+                pm_buffer_append_format(buffer, "\\u%04X", byte);
+            }
+        } else {
+            switch (byte) {
+                case '\a':
+                    if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+                        pm_buffer_append_string(buffer, "\\a", 2);
+                    } else {
+                        pm_buffer_append_format(buffer, "\\u%04X", byte);
+                    }
+                    break;
+                case '\b':
+                    pm_buffer_append_string(buffer, "\\b", 2);
+                    break;
+                case '\t':
+                    pm_buffer_append_string(buffer, "\\t", 2);
+                    break;
+                case '\n':
+                    pm_buffer_append_string(buffer, "\\n", 2);
+                    break;
+                case '\v':
+                    if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+                        pm_buffer_append_string(buffer, "\\v", 2);
+                    } else {
+                        pm_buffer_append_format(buffer, "\\u%04X", byte);
+                    }
+                    break;
+                case '\f':
+                    pm_buffer_append_string(buffer, "\\f", 2);
+                    break;
+                case '\r':
+                    pm_buffer_append_string(buffer, "\\r", 2);
+                    break;
+                case '"':
+                    pm_buffer_append_string(buffer, "\\\"", 2);
+                    break;
+                case '#': {
+                    if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) {
+                        const uint8_t next_byte = source[index + 1];
+                        if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
+                            pm_buffer_append_byte(buffer, '\\');
+                        }
+                    }
+
+                    pm_buffer_append_byte(buffer, '#');
+                    break;
+                }
+                case '\\':
+                    pm_buffer_append_string(buffer, "\\\\", 2);
+                    break;
+                default:
+                    pm_buffer_append_byte(buffer, byte);
+                    break;
+            }
+        }
+    }
+}
+
+/**
+ * Prepend the given string to the buffer.
+ */
+void
+pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
+    size_t cursor = buffer->length;
+    if (pm_buffer_append_length(buffer, length)) {
+        memmove(buffer->value + length, buffer->value, cursor);
+        memcpy(buffer->value, value, length);
+    }
+}
+
+/**
+ * Concatenate one buffer onto another.
+ */
+void
+pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source) {
+    if (source->length > 0) {
+        pm_buffer_append(destination, source->value, source->length);
+    }
+}
+
+/**
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ */
+void
+pm_buffer_clear(pm_buffer_t *buffer) {
+    buffer->length = 0;
+}
+
+/**
+ * Strip the whitespace from the end of the buffer.
+ */
+void
+pm_buffer_rstrip(pm_buffer_t *buffer) {
+    while (buffer->length > 0 && pm_char_is_whitespace((uint8_t) buffer->value[buffer->length - 1])) {
+        buffer->length--;
+    }
+}
+
+/**
+ * Checks if the buffer includes the given value.
+ */
+size_t
+pm_buffer_index(const pm_buffer_t *buffer, char value) {
+    const char *first = memchr(buffer->value, value, buffer->length);
+    return (first == NULL) ? SIZE_MAX : (size_t) (first - buffer->value);
+}
+
+/**
+ * Insert the given string into the buffer at the given index.
+ */
+void
+pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) {
+    assert(index <= buffer->length);
+
+    if (index == buffer->length) {
+        pm_buffer_append_string(buffer, value, length);
+    } else {
+        pm_buffer_append_zeroes(buffer, length);
+        memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index);
+        memcpy(buffer->value + index, value, length);
+    }
+}
+
+/**
+ * Free the memory associated with the buffer.
+ */
+void
+pm_buffer_free(pm_buffer_t *buffer) {
+    xfree(buffer->value);
+}
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
new file mode 100644
index 0000000000..58f7b506cf
--- /dev/null
+++ b/prism/util/pm_buffer.h
@@ -0,0 +1,218 @@
+/**
+ * @file pm_buffer.h
+ *
+ * A wrapper around a contiguous block of allocated memory.
+ */
+#ifndef PRISM_BUFFER_H
+#define PRISM_BUFFER_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_char.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * A pm_buffer_t is a simple memory buffer that stores data in a contiguous
+ * block of memory.
+ */
+typedef struct {
+    /** The length of the buffer in bytes. */
+    size_t length;
+
+    /** The capacity of the buffer in bytes that has been allocated. */
+    size_t capacity;
+
+    /** A pointer to the start of the buffer. */
+    char *value;
+} pm_buffer_t;
+
+/**
+ * Return the size of the pm_buffer_t struct.
+ *
+ * @returns The size of the pm_buffer_t struct.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
+
+/**
+ * Initialize a pm_buffer_t with the given capacity.
+ *
+ * @param buffer The buffer to initialize.
+ * @param capacity The capacity of the buffer.
+ * @returns True if the buffer was initialized successfully, false otherwise.
+ */
+bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
+
+/**
+ * Initialize a pm_buffer_t with its default values.
+ *
+ * @param buffer The buffer to initialize.
+ * @returns True if the buffer was initialized successfully, false otherwise.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
+
+/**
+ * Return the value of the buffer.
+ *
+ * @param buffer The buffer to get the value of.
+ * @returns The value of the buffer.
+ */
+PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer);
+
+/**
+ * Return the length of the buffer.
+ *
+ * @param buffer The buffer to get the length of.
+ * @returns The length of the buffer.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer);
+
+/**
+ * Append the given amount of space as zeroes to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param length The amount of space to append and zero.
+ */
+void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
+
+/**
+ * Append a formatted string to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param format The format string to append.
+ * @param ... The arguments to the format string.
+ */
+void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
+
+/**
+ * Append a string to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The string to append.
+ * @param length The length of the string to append.
+ */
+void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/**
+ * Append a list of bytes to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The bytes to append.
+ * @param length The length of the bytes to append.
+ */
+void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
+
+/**
+ * Append a single byte to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The byte to append.
+ */
+void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
+
+/**
+ * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The integer to append.
+ */
+void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
+
+/**
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The integer to append.
+ */
+void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
+
+/**
+ * Append a double to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The double to append.
+ */
+void pm_buffer_append_double(pm_buffer_t *buffer, double value);
+
+/**
+ * The different types of escaping that can be performed by the buffer when
+ * appending a slice of Ruby source code.
+ */
+typedef enum {
+    PM_BUFFER_ESCAPING_RUBY,
+    PM_BUFFER_ESCAPING_JSON
+} pm_buffer_escaping_t;
+
+/**
+ * Append a slice of source code to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param source The source code to append.
+ * @param length The length of the source code to append.
+ * @param escaping The type of escaping to perform.
+ */
+void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
+
+/**
+ * Prepend the given string to the buffer.
+ *
+ * @param buffer The buffer to prepend to.
+ * @param value The string to prepend.
+ * @param length The length of the string to prepend.
+ */
+void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/**
+ * Concatenate one buffer onto another.
+ *
+ * @param destination The buffer to concatenate onto.
+ * @param source The buffer to concatenate.
+ */
+void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
+
+/**
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ *
+ * @param buffer The buffer to clear.
+ */
+void pm_buffer_clear(pm_buffer_t *buffer);
+
+/**
+ * Strip the whitespace from the end of the buffer.
+ *
+ * @param buffer The buffer to strip.
+ */
+void pm_buffer_rstrip(pm_buffer_t *buffer);
+
+/**
+ * Checks if the buffer includes the given value.
+ *
+ * @param buffer The buffer to check.
+ * @param value The value to check for.
+ * @returns The index of the first occurrence of the value in the buffer, or
+ *   SIZE_MAX if the value is not found.
+ */
+size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/**
+ * Insert the given string into the buffer at the given index.
+ *
+ * @param buffer The buffer to insert into.
+ * @param index The index to insert at.
+ * @param value The string to insert.
+ * @param length The length of the string to insert.
+ */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+/**
+ * Free the memory associated with the buffer.
+ *
+ * @param buffer The buffer to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
+
+#endif
diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c
new file mode 100644
index 0000000000..dce19abd1b
--- /dev/null
+++ b/prism/util/pm_char.c
@@ -0,0 +1,318 @@
+#include "prism/util/pm_char.h"
+
+#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
+#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
+
+#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
+#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
+#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
+#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
+#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
+#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
+#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
+#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
+
+static const uint8_t pm_byte_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
+    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
+    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+static const uint8_t pm_number_table[256] = {
+    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
+    0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
+    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
+    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
+};
+
+/**
+ * Returns the number of characters at the start of the string that match the
+ * given kind. Disallows searching past the given maximum number of characters.
+ */
+static inline size_t
+pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
+    return size;
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace while also tracking the location of each newline. Disallows
+ * searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
+        if (string[size] == '\n') {
+            pm_newline_list_append(newline_list, string + size);
+        }
+
+        size++;
+    }
+
+    return size;
+}
+
+/**
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are regexp
+ * options. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
+}
+
+/**
+ * Returns true if the given character matches the given kind.
+ */
+static inline bool
+pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
+    return (pm_byte_table[b] & kind) != 0;
+}
+
+/**
+ * Returns true if the given character is a whitespace character.
+ */
+bool
+pm_char_is_whitespace(const uint8_t b) {
+    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
+}
+
+/**
+ * Returns true if the given character is an inline whitespace character.
+ */
+bool
+pm_char_is_inline_whitespace(const uint8_t b) {
+    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+/**
+ * Scan through the string and return the number of characters at the start of
+ * the string that match the given kind. Disallows searching past the given
+ * maximum number of characters.
+ */
+static inline size_t
+pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
+    return size;
+}
+
+/**
+ * Scan through the string and return the number of characters at the start of
+ * the string that match the given kind. Disallows searching past the given
+ * maximum number of characters.
+ *
+ * Additionally, report the location of the last invalid underscore character
+ * found in the string through the out invalid parameter.
+ */
+static inline size_t
+pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    bool underscore = false;
+    while (size < maximum && (pm_number_table[string[size]] & kind)) {
+        if (string[size] == '_') {
+            if (underscore) *invalid = string + size;
+            underscore = true;
+        } else {
+            underscore = false;
+        }
+
+        size++;
+    }
+
+    if (string[size - 1] == '_') *invalid = string + size - 1;
+    return size;
+}
+
+/**
+ * Returns the number of characters at the start of the string that are binary
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t
+pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are octal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t
+pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits. Disallows searching past the given maximum number of characters.
+ */
+size_t
+pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore
+ */
+size_t
+pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits. Disallows searching past the given maximum number of
+ * characters.
+ */
+size_t
+pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
+    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits or underscores. Disallows searching past the given maximum
+ * number of characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t
+pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
+}
+
+/**
+ * Returns true if the given character matches the given kind.
+ */
+static inline bool
+pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
+    return (pm_number_table[b] & kind) != 0;
+}
+
+/**
+ * Returns true if the given character is a binary digit.
+ */
+bool
+pm_char_is_binary_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
+}
+
+/**
+ * Returns true if the given character is an octal digit.
+ */
+bool
+pm_char_is_octal_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
+}
+
+/**
+ * Returns true if the given character is a decimal digit.
+ */
+bool
+pm_char_is_decimal_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+/**
+ * Returns true if the given character is a hexadecimal digit.
+ */
+bool
+pm_char_is_hexadecimal_digit(const uint8_t b) {
+    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+#undef PRISM_CHAR_BIT_WHITESPACE
+#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
+#undef PRISM_CHAR_BIT_REGEXP_OPTION
+
+#undef PRISM_NUMBER_BIT_BINARY_DIGIT
+#undef PRISM_NUMBER_BIT_BINARY_NUMBER
+#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
+#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
+#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
+#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
+#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
+#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
new file mode 100644
index 0000000000..deeafd6321
--- /dev/null
+++ b/prism/util/pm_char.h
@@ -0,0 +1,204 @@
+/**
+ * @file pm_char.h
+ *
+ * Functions for working with characters and strings.
+ */
+#ifndef PRISM_CHAR_H
+#define PRISM_CHAR_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_newline_list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are
+ *     whitespace.
+ */
+size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * whitespace while also tracking the location of each newline. Disallows
+ * searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param newline_list The list of newlines to populate.
+ * @return The number of characters at the start of the string that are
+ *     whitespace.
+ */
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
+
+/**
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are inline
+ *     whitespace.
+ */
+size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are decimal
+ *     digits.
+ */
+size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are
+ *     hexadecimal digits.
+ */
+size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are octal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are octal
+ *     digits or underscores.
+ */
+size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns the number of characters at the start of the string that are decimal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are decimal
+ *     digits or underscores.
+ */
+size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits or underscores. Disallows searching past the given maximum
+ * number of characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are
+ *     hexadecimal digits or underscores.
+ */
+size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns the number of characters at the start of the string that are regexp
+ * options. Disallows searching past the given maximum number of characters.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @return The number of characters at the start of the string that are regexp
+ *     options.
+ */
+size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
+
+/**
+ * Returns the number of characters at the start of the string that are binary
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ *
+ * @param string The string to search.
+ * @param length The maximum number of characters to search.
+ * @param invalid The pointer to set to the index of the first invalid
+ *     underscore.
+ * @return The number of characters at the start of the string that are binary
+ *     digits or underscores.
+ */
+size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/**
+ * Returns true if the given character is a whitespace character.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a whitespace character.
+ */
+bool pm_char_is_whitespace(const uint8_t b);
+
+/**
+ * Returns true if the given character is an inline whitespace character.
+ *
+ * @param b The character to check.
+ * @return True if the given character is an inline whitespace character.
+ */
+bool pm_char_is_inline_whitespace(const uint8_t b);
+
+/**
+ * Returns true if the given character is a binary digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a binary digit.
+ */
+bool pm_char_is_binary_digit(const uint8_t b);
+
+/**
+ * Returns true if the given character is an octal digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is an octal digit.
+ */
+bool pm_char_is_octal_digit(const uint8_t b);
+
+/**
+ * Returns true if the given character is a decimal digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a decimal digit.
+ */
+bool pm_char_is_decimal_digit(const uint8_t b);
+
+/**
+ * Returns true if the given character is a hexadecimal digit.
+ *
+ * @param b The character to check.
+ * @return True if the given character is a hexadecimal digit.
+ */
+bool pm_char_is_hexadecimal_digit(const uint8_t b);
+
+#endif
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
new file mode 100644
index 0000000000..624002cec9
--- /dev/null
+++ b/prism/util/pm_constant_pool.c
@@ -0,0 +1,338 @@
+#include "prism/util/pm_constant_pool.h"
+
+/**
+ * Initialize a list of constant ids.
+ */
+void
+pm_constant_id_list_init(pm_constant_id_list_t *list) {
+    list->ids = NULL;
+    list->size = 0;
+    list->capacity = 0;
+}
+
+/**
+ * Initialize a list of constant ids with a given capacity.
+ */
+void
+pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
+    list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
+    if (list->ids == NULL) abort();
+
+    list->size = 0;
+    list->capacity = capacity;
+}
+
+/**
+ * Append a constant id to a list of constant ids. Returns false if any
+ * potential reallocations fail.
+ */
+bool
+pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
+    if (list->size >= list->capacity) {
+        list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
+        list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
+        if (list->ids == NULL) return false;
+    }
+
+    list->ids[list->size++] = id;
+    return true;
+}
+
+/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ */
+void
+pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
+    assert(index < list->capacity);
+    assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
+
+    list->ids[index] = id;
+    list->size++;
+}
+
+/**
+ * Checks if the current constant id list includes the given constant id.
+ */
+bool
+pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
+    for (size_t index = 0; index < list->size; index++) {
+        if (list->ids[index] == id) return true;
+    }
+    return false;
+}
+
+/**
+ * Free the memory associated with a list of constant ids.
+ */
+void
+pm_constant_id_list_free(pm_constant_id_list_t *list) {
+    if (list->ids != NULL) {
+        xfree(list->ids);
+    }
+}
+
+/**
+ * A relatively simple hash function (djb2) that is used to hash strings. We are
+ * optimizing here for simplicity and speed.
+ */
+static inline uint32_t
+pm_constant_pool_hash(const uint8_t *start, size_t length) {
+    // This is a prime number used as the initial value for the hash function.
+    uint32_t value = 5381;
+
+    for (size_t index = 0; index < length; index++) {
+        value = ((value << 5) + value) + start[index];
+    }
+
+    return value;
+}
+
+/**
+ * https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+ */
+static uint32_t
+next_power_of_two(uint32_t v) {
+    // Avoid underflow in subtraction on next line.
+    if (v == 0) {
+        // 1 is the nearest power of 2 to 0 (2^0)
+        return 1;
+    }
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+#ifndef NDEBUG
+static bool
+is_power_of_two(uint32_t size) {
+    return (size & (size - 1)) == 0;
+}
+#endif
+
+/**
+ * Resize a constant pool to a given capacity.
+ */
+static inline bool
+pm_constant_pool_resize(pm_constant_pool_t *pool) {
+    assert(is_power_of_two(pool->capacity));
+
+    uint32_t next_capacity = pool->capacity * 2;
+    if (next_capacity < pool->capacity) return false;
+
+    const uint32_t mask = next_capacity - 1;
+    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
+
+    void *next = xcalloc(next_capacity, element_size);
+    if (next == NULL) return false;
+
+    pm_constant_pool_bucket_t *next_buckets = next;
+    pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
+
+    // For each bucket in the current constant pool, find the index in the
+    // next constant pool, and insert it.
+    for (uint32_t index = 0; index < pool->capacity; index++) {
+        pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
+
+        // If an id is set on this constant, then we know we have content here.
+        // In this case we need to insert it into the next constant pool.
+        if (bucket->id != PM_CONSTANT_ID_UNSET) {
+            uint32_t next_index = bucket->hash & mask;
+
+            // This implements linear scanning to find the next available slot
+            // in case this index is already taken. We don't need to bother
+            // comparing the values since we know that the hash is unique.
+            while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
+                next_index = (next_index + 1) & mask;
+            }
+
+            // Here we copy over the entire bucket, which includes the id so
+            // that they are consistent between resizes.
+            next_buckets[next_index] = *bucket;
+        }
+    }
+
+    // The constants are stable with respect to hash table resizes.
+    memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
+
+    // pool->constants and pool->buckets are allocated out of the same chunk
+    // of memory, with the buckets coming first.
+    xfree(pool->buckets);
+    pool->constants = next_constants;
+    pool->buckets = next_buckets;
+    pool->capacity = next_capacity;
+    return true;
+}
+
+/**
+ * Initialize a new constant pool with a given capacity.
+ */
+bool
+pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
+    const uint32_t maximum = (~((uint32_t) 0));
+    if (capacity >= ((maximum / 2) + 1)) return false;
+
+    capacity = next_power_of_two(capacity);
+    const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
+    void *memory = xcalloc(capacity, element_size);
+    if (memory == NULL) return false;
+
+    pool->buckets = memory;
+    pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
+    pool->size = 0;
+    pool->capacity = capacity;
+    return true;
+}
+
+/**
+ * Return a pointer to the constant indicated by the given constant id.
+ */
+pm_constant_t *
+pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
+    assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
+    return &pool->constants[constant_id - 1];
+}
+
+/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ */
+pm_constant_id_t
+pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    assert(is_power_of_two(pool->capacity));
+    const uint32_t mask = pool->capacity - 1;
+
+    uint32_t hash = pm_constant_pool_hash(start, length);
+    uint32_t index = hash & mask;
+    pm_constant_pool_bucket_t *bucket;
+
+    while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
+        pm_constant_t *constant = &pool->constants[bucket->id - 1];
+        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+            return bucket->id;
+        }
+
+        index = (index + 1) & mask;
+    }
+
+    return PM_CONSTANT_ID_UNSET;
+}
+
+/**
+ * Insert a constant into a constant pool and return its index in the pool.
+ */
+static inline pm_constant_id_t
+pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
+    if (pool->size >= (pool->capacity / 4 * 3)) {
+        if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
+    }
+
+    assert(is_power_of_two(pool->capacity));
+    const uint32_t mask = pool->capacity - 1;
+
+    uint32_t hash = pm_constant_pool_hash(start, length);
+    uint32_t index = hash & mask;
+    pm_constant_pool_bucket_t *bucket;
+
+    while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
+        // If there is a collision, then we need to check if the content is the
+        // same as the content we are trying to insert. If it is, then we can
+        // return the id of the existing constant.
+        pm_constant_t *constant = &pool->constants[bucket->id - 1];
+
+        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+            // Since we have found a match, we need to check if this is
+            // attempting to insert a shared or an owned constant. We want to
+            // prefer shared constants since they don't require allocations.
+            if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+                // If we're attempting to insert an owned constant and we have
+                // an existing constant, then either way we don't want the given
+                // memory. Either it's duplicated with the existing constant or
+                // it's not necessary because we have a shared version.
+                xfree((void *) start);
+            } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+                // If we're attempting to insert a shared constant and the
+                // existing constant is owned, then we can free the owned
+                // constant and replace it with the shared constant.
+                xfree((void *) constant->start);
+                constant->start = start;
+                bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
+            }
+
+            return bucket->id;
+        }
+
+        index = (index + 1) & mask;
+    }
+
+    // IDs are allocated starting at 1, since the value 0 denotes a non-existent
+    // constant.
+    uint32_t id = ++pool->size;
+    assert(pool->size < ((uint32_t) (1 << 30)));
+
+    *bucket = (pm_constant_pool_bucket_t) {
+        .id = (unsigned int) (id & 0x3fffffff),
+        .type = (unsigned int) (type & 0x3),
+        .hash = hash
+    };
+
+    pool->constants[id - 1] = (pm_constant_t) {
+        .start = start,
+        .length = length,
+    };
+
+    return id;
+}
+
+/**
+ * Insert a constant into a constant pool. Returns the id of the constant, or
+ * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
+ */
+pm_constant_id_t
+pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
+}
+
+/**
+ * Insert a constant into a constant pool from memory that is now owned by the
+ * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
+ * potential calls to resize fail.
+ */
+pm_constant_id_t
+pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
+}
+
+/**
+ * Insert a constant into a constant pool from memory that is constant. Returns
+ * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
+ * resize fail.
+ */
+pm_constant_id_t
+pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
+}
+
+/**
+ * Free the memory associated with a constant pool.
+ */
+void
+pm_constant_pool_free(pm_constant_pool_t *pool) {
+    // For each constant in the current constant pool, free the contents if the
+    // contents are owned.
+    for (uint32_t index = 0; index < pool->capacity; index++) {
+        pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
+
+        // If an id is set on this constant, then we know we have content here.
+        if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+            pm_constant_t *constant = &pool->constants[bucket->id - 1];
+            xfree((void *) constant->start);
+        }
+    }
+
+    xfree(pool->buckets);
+}
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
new file mode 100644
index 0000000000..6df23f8f50
--- /dev/null
+++ b/prism/util/pm_constant_pool.h
@@ -0,0 +1,218 @@
+/**
+ * @file pm_constant_pool.h
+ *
+ * A data structure that stores a set of strings.
+ *
+ * Each string is assigned a unique id, which can be used to compare strings for
+ * equality. This comparison ends up being much faster than strcmp, since it
+ * only requires a single integer comparison.
+ */
+#ifndef PRISM_CONSTANT_POOL_H
+#define PRISM_CONSTANT_POOL_H
+
+#include "prism/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * When we allocate constants into the pool, we reserve 0 to mean that the slot
+ * is not yet filled. This constant is reused in other places to indicate the
+ * lack of a constant id.
+ */
+#define PM_CONSTANT_ID_UNSET 0
+
+/**
+ * A constant id is a unique identifier for a constant in the constant pool.
+ */
+typedef uint32_t pm_constant_id_t;
+
+/**
+ * A list of constant IDs. Usually used to represent a set of locals.
+ */
+typedef struct {
+    /** The number of constant ids in the list. */
+    size_t size;
+
+    /** The number of constant ids that have been allocated in the list. */
+    size_t capacity;
+
+    /** The constant ids in the list. */
+    pm_constant_id_t *ids;
+} pm_constant_id_list_t;
+
+/**
+ * Initialize a list of constant ids.
+ *
+ * @param list The list to initialize.
+ */
+void pm_constant_id_list_init(pm_constant_id_list_t *list);
+
+/**
+ * Initialize a list of constant ids with a given capacity.
+ *
+ * @param list The list to initialize.
+ * @param capacity The initial capacity of the list.
+ */
+void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
+
+/**
+ * Append a constant id to a list of constant ids. Returns false if any
+ * potential reallocations fail.
+ *
+ * @param list The list to append to.
+ * @param id The id to append.
+ * @return Whether the append succeeded.
+ */
+bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
+
+/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ *
+ * @param list The list to insert into.
+ * @param index The index at which to insert.
+ * @param id The id to insert.
+ */
+void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
+
+/**
+ * Checks if the current constant id list includes the given constant id.
+ *
+ * @param list The list to check.
+ * @param id The id to check for.
+ * @return Whether the list includes the given id.
+ */
+bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
+
+/**
+ * Free the memory associated with a list of constant ids.
+ *
+ * @param list The list to free.
+ */
+void pm_constant_id_list_free(pm_constant_id_list_t *list);
+
+/**
+ * The type of bucket in the constant pool hash map. This determines how the
+ * bucket should be freed.
+ */
+typedef unsigned int pm_constant_pool_bucket_type_t;
+
+/** By default, each constant is a slice of the source. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
+
+/** An owned constant is one for which memory has been allocated. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
+
+/** A constant constant is known at compile time. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
+
+/** A bucket in the hash map. */
+typedef struct {
+    /** The incremental ID used for indexing back into the pool. */
+    unsigned int id: 30;
+
+    /** The type of the bucket, which determines how to free it. */
+    pm_constant_pool_bucket_type_t type: 2;
+
+    /** The hash of the bucket. */
+    uint32_t hash;
+} pm_constant_pool_bucket_t;
+
+/** A constant in the pool which effectively stores a string. */
+typedef struct {
+    /** A pointer to the start of the string. */
+    const uint8_t *start;
+
+    /** The length of the string. */
+    size_t length;
+} pm_constant_t;
+
+/** The overall constant pool, which stores constants found while parsing. */
+typedef struct {
+    /** The buckets in the hash map. */
+    pm_constant_pool_bucket_t *buckets;
+
+    /** The constants that are stored in the buckets. */
+    pm_constant_t *constants;
+
+    /** The number of buckets in the hash map. */
+    uint32_t size;
+
+    /** The number of buckets that have been allocated in the hash map. */
+    uint32_t capacity;
+} pm_constant_pool_t;
+
+/**
+ * Initialize a new constant pool with a given capacity.
+ *
+ * @param pool The pool to initialize.
+ * @param capacity The initial capacity of the pool.
+ * @return Whether the initialization succeeded.
+ */
+bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
+
+/**
+ * Return a pointer to the constant indicated by the given constant id.
+ *
+ * @param pool The pool to get the constant from.
+ * @param constant_id The id of the constant to get.
+ * @return A pointer to the constant.
+ */
+pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
+
+/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ *
+ * @param pool The pool to find the constant in.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
+ * Insert a constant into a constant pool that is a slice of a source string.
+ * Returns the id of the constant, or 0 if any potential calls to resize fail.
+ *
+ * @param pool The pool to insert the constant into.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
+ * Insert a constant into a constant pool from memory that is now owned by the
+ * constant pool. Returns the id of the constant, or 0 if any potential calls to
+ * resize fail.
+ *
+ * @param pool The pool to insert the constant into.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
+
+/**
+ * Insert a constant into a constant pool from memory that is constant. Returns
+ * the id of the constant, or 0 if any potential calls to resize fail.
+ *
+ * @param pool The pool to insert the constant into.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/**
+ * Free the memory associated with a constant pool.
+ *
+ * @param pool The pool to free.
+ */
+void pm_constant_pool_free(pm_constant_pool_t *pool);
+
+#endif
diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c
new file mode 100644
index 0000000000..5b0f34465c
--- /dev/null
+++ b/prism/util/pm_integer.c
@@ -0,0 +1,670 @@
+#include "prism/util/pm_integer.h"
+
+/**
+ * Pull out the length and values from the integer, regardless of the form in
+ * which the length/values are stored.
+ */
+#define INTEGER_EXTRACT(integer, length_variable, values_variable) \
+    if ((integer)->values == NULL) { \
+        length_variable = 1; \
+        values_variable = &(integer)->value; \
+    } else { \
+        length_variable = (integer)->length; \
+        values_variable = (integer)->values; \
+    }
+
+/**
+ * Adds two positive pm_integer_t with the given base.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
+    size_t left_length;
+    uint32_t *left_values;
+    INTEGER_EXTRACT(left, left_length, left_values)
+
+    size_t right_length;
+    uint32_t *right_values;
+    INTEGER_EXTRACT(right, right_length, right_values)
+
+    size_t length = left_length < right_length ? right_length : left_length;
+    uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1));
+    if (values == NULL) return;
+
+    uint64_t carry = 0;
+    for (size_t index = 0; index < length; index++) {
+        uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0);
+        values[index] = (uint32_t) (sum % base);
+        carry = sum / base;
+    }
+
+    if (carry > 0) {
+        values[length] = (uint32_t) carry;
+        length++;
+    }
+
+    *destination = (pm_integer_t) { 0, length, values, false };
+}
+
+/**
+ * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
+ * base. Assume a, b, c, a - b - c all to be positive.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) {
+    size_t a_length;
+    uint32_t *a_values;
+    INTEGER_EXTRACT(a, a_length, a_values)
+
+    size_t b_length;
+    uint32_t *b_values;
+    INTEGER_EXTRACT(b, b_length, b_values)
+
+    size_t c_length;
+    uint32_t *c_values;
+    INTEGER_EXTRACT(c, c_length, c_values)
+
+    uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length);
+    int64_t carry = 0;
+
+    for (size_t index = 0; index < a_length; index++) {
+        int64_t sub = (
+            carry +
+            a_values[index] -
+            (index < b_length ? b_values[index] : 0) -
+            (index < c_length ? c_values[index] : 0)
+        );
+
+        if (sub >= 0) {
+            values[index] = (uint32_t) sub;
+            carry = 0;
+        } else {
+            sub += 2 * (int64_t) base;
+            values[index] = (uint32_t) ((uint64_t) sub % base);
+            carry = sub / (int64_t) base - 2;
+        }
+    }
+
+    while (a_length > 1 && values[a_length - 1] == 0) a_length--;
+    *destination = (pm_integer_t) { 0, a_length, values, false };
+}
+
+/**
+ * Multiply two positive integers with the given base using karatsuba algorithm.
+ * Return pm_integer_t with values allocated. Not normalized.
+ */
+static void
+karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
+    size_t left_length;
+    uint32_t *left_values;
+    INTEGER_EXTRACT(left, left_length, left_values)
+
+    size_t right_length;
+    uint32_t *right_values;
+    INTEGER_EXTRACT(right, right_length, right_values)
+
+    if (left_length > right_length) {
+        size_t temporary_length = left_length;
+        left_length = right_length;
+        right_length = temporary_length;
+
+        uint32_t *temporary_values = left_values;
+        left_values = right_values;
+        right_values = temporary_values;
+    }
+
+    if (left_length <= 10) {
+        size_t length = left_length + right_length;
+        uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+        if (values == NULL) return;
+
+        for (size_t left_index = 0; left_index < left_length; left_index++) {
+            uint32_t carry = 0;
+            for (size_t right_index = 0; right_index < right_length; right_index++) {
+                uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry;
+                values[left_index + right_index] = (uint32_t) (product % base);
+                carry = (uint32_t) (product / base);
+            }
+            values[left_index + right_length] = carry;
+        }
+
+        while (length > 1 && values[length - 1] == 0) length--;
+        *destination = (pm_integer_t) { 0, length, values, false };
+        return;
+    }
+
+    if (left_length * 2 <= right_length) {
+        uint32_t *values = (uint32_t *) xcalloc(left_length + right_length, sizeof(uint32_t));
+
+        for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) {
+            size_t end_offset = start_offset + left_length;
+            if (end_offset > right_length) end_offset = right_length;
+
+            pm_integer_t sliced_left = {
+                .value = 0,
+                .length = left_length,
+                .values = left_values,
+                .negative = false
+            };
+
+            pm_integer_t sliced_right = {
+                .value = 0,
+                .length = end_offset - start_offset,
+                .values = right_values + start_offset,
+                .negative = false
+            };
+
+            pm_integer_t product;
+            karatsuba_multiply(&product, &sliced_left, &sliced_right, base);
+
+            uint32_t carry = 0;
+            for (size_t index = 0; index < product.length; index++) {
+                uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry;
+                values[start_offset + index] = (uint32_t) (sum % base);
+                carry = (uint32_t) (sum / base);
+            }
+
+            if (carry > 0) values[start_offset + product.length] += carry;
+            pm_integer_free(&product);
+        }
+
+        *destination = (pm_integer_t) { 0, left_length + right_length, values, false };
+        return;
+    }
+
+    size_t half = left_length / 2;
+    pm_integer_t x0 = { 0, half, left_values, false };
+    pm_integer_t x1 = { 0, left_length - half, left_values + half, false };
+    pm_integer_t y0 = { 0, half, right_values, false };
+    pm_integer_t y1 = { 0, right_length - half, right_values + half, false };
+
+    pm_integer_t z0 = { 0 };
+    karatsuba_multiply(&z0, &x0, &y0, base);
+
+    pm_integer_t z2 = { 0 };
+    karatsuba_multiply(&z2, &x1, &y1, base);
+
+    // For simplicity to avoid considering negative values,
+    // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
+    pm_integer_t x01 = { 0 };
+    big_add(&x01, &x0, &x1, base);
+
+    pm_integer_t y01 = { 0 };
+    big_add(&y01, &y0, &y1, base);
+
+    pm_integer_t xy = { 0 };
+    karatsuba_multiply(&xy, &x01, &y01, base);
+
+    pm_integer_t z1;
+    big_sub2(&z1, &xy, &z0, &z2, base);
+
+    size_t length = left_length + right_length;
+    uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t));
+
+    assert(z0.values != NULL);
+    memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
+
+    assert(z2.values != NULL);
+    memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
+
+    uint32_t carry = 0;
+    for(size_t index = 0; index < z1.length; index++) {
+        uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index];
+        values[index + half] = (uint32_t) (sum % base);
+        carry = (uint32_t) (sum / base);
+    }
+
+    for(size_t index = half + z1.length; carry > 0; index++) {
+        uint64_t sum = (uint64_t) carry + values[index];
+        values[index] = (uint32_t) (sum % base);
+        carry = (uint32_t) (sum / base);
+    }
+
+    while (length > 1 && values[length - 1] == 0) length--;
+    pm_integer_free(&z0);
+    pm_integer_free(&z1);
+    pm_integer_free(&z2);
+    pm_integer_free(&x01);
+    pm_integer_free(&y01);
+    pm_integer_free(&xy);
+
+    *destination = (pm_integer_t) { 0, length, values, false };
+}
+
+/**
+ * The values of a hexadecimal digit, where the index is the ASCII character.
+ * Note that there's an odd exception here where _ is mapped to 0. This is
+ * because it's possible for us to end up trying to parse a number that has
+ * already had an error attached to it, and we want to provide _something_ to
+ * the user.
+ */
+static const int8_t pm_integer_parse_digit_values[256] = {
+//   0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0, // 5x
+    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx
+};
+
+/**
+ * Return the value of a hexadecimal digit in a uint8_t.
+ */
+static uint8_t
+pm_integer_parse_digit(const uint8_t character) {
+    int8_t value = pm_integer_parse_digit_values[character];
+    assert(value != -1 && "invalid digit");
+
+    return (uint8_t) value;
+}
+
+/**
+ * Create a pm_integer_t from uint64_t with the given base. It is assumed that
+ * the memory for the pm_integer_t pointer has been zeroed.
+ */
+static void
+pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) {
+    if (value < base) {
+        integer->value = (uint32_t) value;
+        return;
+    }
+
+    size_t length = 0;
+    uint64_t length_value = value;
+    while (length_value > 0) {
+        length++;
+        length_value /= base;
+    }
+
+    uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length);
+    if (values == NULL) return;
+
+    for (size_t value_index = 0; value_index < length; value_index++) {
+        values[value_index] = (uint32_t) (value % base);
+        value /= base;
+    }
+
+    integer->length = length;
+    integer->values = values;
+}
+
+/**
+ * Normalize pm_integer_t.
+ * Heading zero values will be removed. If the integer fits into uint32_t,
+ * values is set to NULL, length is set to 0, and value field will be used.
+ */
+static void
+pm_integer_normalize(pm_integer_t *integer) {
+    if (integer->values == NULL) {
+        return;
+    }
+
+    while (integer->length > 1 && integer->values[integer->length - 1] == 0) {
+        integer->length--;
+    }
+
+    if (integer->length > 1) {
+        return;
+    }
+
+    uint32_t value = integer->values[0];
+    bool negative = integer->negative && value != 0;
+
+    pm_integer_free(integer);
+    *integer = (pm_integer_t) { .value = value, .length = 0, .values = NULL, .negative = negative };
+}
+
+/**
+ * Convert base of the integer.
+ * In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
+ */
+static void
+pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) {
+    size_t source_length;
+    const uint32_t *source_values;
+    INTEGER_EXTRACT(source, source_length, source_values)
+
+    size_t bigints_length = (source_length + 1) / 2;
+    assert(bigints_length > 0);
+
+    pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t));
+    if (bigints == NULL) return;
+
+    for (size_t index = 0; index < source_length; index += 2) {
+        uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0);
+        pm_integer_from_uint64(&bigints[index / 2], value, base_to);
+    }
+
+    pm_integer_t base = { 0 };
+    pm_integer_from_uint64(&base, base_from, base_to);
+
+    while (bigints_length > 1) {
+        pm_integer_t next_base;
+        karatsuba_multiply(&next_base, &base, &base, base_to);
+
+        pm_integer_free(&base);
+        base = next_base;
+
+        size_t next_length = (bigints_length + 1) / 2;
+        pm_integer_t *next_bigints = (pm_integer_t *) xcalloc(next_length, sizeof(pm_integer_t));
+
+        for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) {
+            if (bigints_index + 1 == bigints_length) {
+                next_bigints[bigints_index / 2] = bigints[bigints_index];
+            } else {
+                pm_integer_t multiplied = { 0 };
+                karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to);
+
+                big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to);
+                pm_integer_free(&bigints[bigints_index]);
+                pm_integer_free(&bigints[bigints_index + 1]);
+                pm_integer_free(&multiplied);
+            }
+        }
+
+        xfree(bigints);
+        bigints = next_bigints;
+        bigints_length = next_length;
+    }
+
+    *destination = bigints[0];
+    destination->negative = source->negative;
+    pm_integer_normalize(destination);
+
+    xfree(bigints);
+    pm_integer_free(&base);
+}
+
+#undef INTEGER_EXTRACT
+
+/**
+ * Convert digits to integer with the given power-of-two base.
+ */
+static void
+pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) {
+    size_t bit = 1;
+    while (base > (uint32_t) (1 << bit)) bit++;
+
+    size_t length = (digits_length * bit + 31) / 32;
+    uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+
+    for (size_t digit_index = 0; digit_index < digits_length; digit_index++) {
+        size_t bit_position = bit * (digits_length - digit_index - 1);
+        uint32_t value = digits[digit_index];
+
+        size_t index = bit_position / 32;
+        size_t shift = bit_position % 32;
+
+        values[index] |= value << shift;
+        if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
+    }
+
+    while (length > 1 && values[length - 1] == 0) length--;
+    *integer = (pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false };
+    pm_integer_normalize(integer);
+}
+
+/**
+ * Convert decimal digits to pm_integer_t.
+ */
+static void
+pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
+    const size_t batch = 9;
+    size_t length = (digits_length + batch - 1) / batch;
+
+    uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
+    uint32_t value = 0;
+
+    for (size_t digits_index = 0; digits_index < digits_length; digits_index++) {
+        value = value * 10 + digits[digits_index];
+
+        size_t reverse_index = digits_length - digits_index - 1;
+        if (reverse_index % batch == 0) {
+            values[reverse_index / batch] = value;
+            value = 0;
+        }
+    }
+
+    // Convert base from 10**9 to 1<<32.
+    pm_integer_convert_base(integer, &((pm_integer_t) { .value = 0, .length = length, .values = values, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
+    xfree(values);
+}
+
+/**
+ * Parse a large integer from a string that does not fit into uint32_t.
+ */
+static void
+pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
+    // Allocate an array to store digits.
+    uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
+    size_t digits_length = 0;
+
+    for (; start < end; start++) {
+        if (*start == '_') continue;
+        digits[digits_length++] = pm_integer_parse_digit(*start);
+    }
+
+    // Construct pm_integer_t from the digits.
+    if (multiplier == 10) {
+        pm_integer_parse_decimal(integer, digits, digits_length);
+    } else {
+        pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
+    }
+
+    xfree(digits);
+}
+
+/**
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ */
+void
+pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
+    // Ignore unary +. Unary - is parsed differently and will not end up here.
+    // Instead, it will modify the parsed integer later.
+    if (*start == '+') start++;
+
+    // Determine the multiplier from the base, and skip past any prefixes.
+    uint32_t multiplier = 10;
+    switch (base) {
+        case PM_INTEGER_BASE_DEFAULT:
+            while (*start == '0') start++; // 01 -> 1
+            break;
+        case PM_INTEGER_BASE_BINARY:
+            start += 2; // 0b
+            multiplier = 2;
+            break;
+        case PM_INTEGER_BASE_OCTAL:
+            start++; // 0
+            if (*start == '_' || *start == 'o' || *start == 'O') start++; // o
+            multiplier = 8;
+            break;
+        case PM_INTEGER_BASE_DECIMAL:
+            if (*start == '0' && (end - start) > 1) start += 2; // 0d
+            break;
+        case PM_INTEGER_BASE_HEXADECIMAL:
+            start += 2; // 0x
+            multiplier = 16;
+            break;
+        case PM_INTEGER_BASE_UNKNOWN:
+            if (*start == '0' && (end - start) > 1) {
+                switch (start[1]) {
+                    case '_': start += 2; multiplier = 8; break;
+                    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
+                    case 'b': case 'B': start += 2; multiplier = 2; break;
+                    case 'o': case 'O': start += 2; multiplier = 8; break;
+                    case 'd': case 'D': start += 2; break;
+                    case 'x': case 'X': start += 2; multiplier = 16; break;
+                    default: assert(false && "unreachable"); break;
+                }
+            }
+            break;
+    }
+
+    // It's possible that we've consumed everything at this point if there is an
+    // invalid integer. If this is the case, we'll just return 0.
+    if (start >= end) return;
+
+    const uint8_t *cursor = start;
+    uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++);
+
+    for (; cursor < end; cursor++) {
+        if (*cursor == '_') continue;
+        value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor);
+
+        if (value > UINT32_MAX) {
+            // If the integer is too large to fit into a single uint32_t, then
+            // we'll parse it as a big integer.
+            pm_integer_parse_big(integer, multiplier, start, end);
+            return;
+        }
+    }
+
+    integer->value = (uint32_t) value;
+}
+
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int
+pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
+    if (left->negative != right->negative) return left->negative ? -1 : 1;
+    int negative = left->negative ? -1 : 1;
+
+    if (left->values == NULL && right->values == NULL) {
+        if (left->value < right->value) return -1 * negative;
+        if (left->value > right->value) return 1 * negative;
+        return 0;
+    }
+
+    if (left->values == NULL || left->length < right->length) return -1 * negative;
+    if (right->values == NULL || left->length > right->length) return 1 * negative;
+
+    for (size_t index = 0; index < left->length; index++) {
+        size_t value_index = left->length - index - 1;
+        uint32_t left_value = left->values[value_index];
+        uint32_t right_value = right->values[value_index];
+
+        if (left_value < right_value) return -1 * negative;
+        if (left_value > right_value) return 1 * negative;
+    }
+
+    return 0;
+}
+
+/**
+ * Reduce a ratio of integers to its simplest form.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
+    // If either the numerator or denominator do not fit into a 32-bit integer,
+    // then this function is a no-op. In the future, we may consider reducing
+    // even the larger numbers, but for now we're going to keep it simple.
+    if (
+        // If the numerator doesn't fit into a 32-bit integer, return early.
+        numerator->length != 0 ||
+        // If the denominator doesn't fit into a 32-bit integer, return early.
+        denominator->length != 0 ||
+        // If the numerator is 0, then return early.
+        numerator->value == 0 ||
+        // If the denominator is 1, then return early.
+        denominator->value == 1
+    ) return;
+
+    // Find the greatest common divisor of the numerator and denominator.
+    uint32_t divisor = numerator->value;
+    uint32_t remainder = denominator->value;
+
+    while (remainder != 0) {
+        uint32_t temporary = remainder;
+        remainder = divisor % remainder;
+        divisor = temporary;
+    }
+
+    // Divide the numerator and denominator by the greatest common divisor.
+    numerator->value /= divisor;
+    denominator->value /= divisor;
+}
+
+/**
+ * Convert an integer to a decimal string.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
+    if (integer->negative) {
+        pm_buffer_append_byte(buffer, '-');
+    }
+
+    // If the integer fits into a single uint32_t, then we can just append the
+    // value directly to the buffer.
+    if (integer->values == NULL) {
+        pm_buffer_append_format(buffer, "%" PRIu32, integer->value);
+        return;
+    }
+
+    // If the integer is two uint32_t values, then we can | them together and
+    // append the result to the buffer.
+    if (integer->length == 2) {
+        const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32);
+        pm_buffer_append_format(buffer, "%" PRIu64, value);
+        return;
+    }
+
+    // Otherwise, first we'll convert the base from 1<<32 to 10**9.
+    pm_integer_t converted = { 0 };
+    pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000);
+
+    if (converted.values == NULL) {
+        pm_buffer_append_format(buffer, "%" PRIu32, converted.value);
+        pm_integer_free(&converted);
+        return;
+    }
+
+    // Allocate a buffer that we'll copy the decimal digits into.
+    size_t digits_length = converted.length * 9;
+    char *digits = xcalloc(digits_length, sizeof(char));
+    if (digits == NULL) return;
+
+    // Pack bigdecimal to digits.
+    for (size_t value_index = 0; value_index < converted.length; value_index++) {
+        uint32_t value = converted.values[value_index];
+
+        for (size_t digit_index = 0; digit_index < 9; digit_index++) {
+            digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10);
+            value /= 10;
+        }
+    }
+
+    size_t start_offset = 0;
+    while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++;
+
+    // Finally, append the string to the buffer and free the digits.
+    pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
+    xfree(digits);
+    pm_integer_free(&converted);
+}
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single uint32_t.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_integer_free(pm_integer_t *integer) {
+    if (integer->values) {
+        xfree(integer->values);
+    }
+}
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
new file mode 100644
index 0000000000..91b28ad2f3
--- /dev/null
+++ b/prism/util/pm_integer.h
@@ -0,0 +1,126 @@
+/**
+ * @file pm_integer.h
+ *
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_NUMBER_H
+#define PRISM_NUMBER_H
+
+#include "prism/defines.h"
+#include "prism/util/pm_buffer.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * A structure represents an arbitrary-sized integer.
+ */
+typedef struct {
+    /**
+     * Embedded value for small integer. This value is set to 0 if the value
+     * does not fit into uint32_t.
+     */
+    uint32_t value;
+
+    /**
+     * The number of allocated values. length is set to 0 if the integer fits
+     * into uint32_t.
+     */
+    size_t length;
+
+    /**
+     * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
+     */
+    uint32_t *values;
+
+    /**
+     * Whether or not the integer is negative. It is stored this way so that a
+     * zeroed pm_integer_t is always positive zero.
+     */
+    bool negative;
+} pm_integer_t;
+
+/**
+ * An enum controlling the base of an integer. It is expected that the base is
+ * already known before parsing the integer, even though it could be derived
+ * from the string itself.
+ */
+typedef enum {
+    /** The default decimal base, with no prefix. Leading 0s will be ignored. */
+    PM_INTEGER_BASE_DEFAULT,
+
+    /** The binary base, indicated by a 0b or 0B prefix. */
+    PM_INTEGER_BASE_BINARY,
+
+    /** The octal base, indicated by a 0, 0o, or 0O prefix. */
+    PM_INTEGER_BASE_OCTAL,
+
+    /** The decimal base, indicated by a 0d, 0D, or empty prefix. */
+    PM_INTEGER_BASE_DECIMAL,
+
+    /** The hexadecimal base, indicated by a 0x or 0X prefix. */
+    PM_INTEGER_BASE_HEXADECIMAL,
+
+    /**
+     * An unknown base, in which case pm_integer_parse will derive it based on
+     * the content of the string. This is less efficient and does more
+     * comparisons, so if callers know the base ahead of time, they should use
+     * that instead.
+     */
+    PM_INTEGER_BASE_UNKNOWN
+} pm_integer_base_t;
+
+/**
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ *
+ * @param integer The integer to parse into.
+ * @param base The base of the integer.
+ * @param start The start of the string.
+ * @param end The end of the string.
+ */
+void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
+
+/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ *
+ * @param left The left integer to compare.
+ * @param right The right integer to compare.
+ * @return The result of the comparison.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
+/**
+ * Reduce a ratio of integers to its simplest form.
+ *
+ * If either the numerator or denominator do not fit into a 32-bit integer, then
+ * this function is a no-op. In the future, we may consider reducing even the
+ * larger numbers, but for now we're going to keep it simple.
+ *
+ * @param numerator The numerator of the ratio.
+ * @param denominator The denominator of the ratio.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
+
+/**
+ * Convert an integer to a decimal string.
+ *
+ * @param buffer The buffer to append the string to.
+ * @param integer The integer to convert to a string.
+ */
+PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single node in the linked list.
+ *
+ * @param integer The integer to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer);
+
+#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
new file mode 100644
index 0000000000..ad2294cd60
--- /dev/null
+++ b/prism/util/pm_list.c
@@ -0,0 +1,49 @@
+#include "prism/util/pm_list.h"
+
+/**
+ * Returns true if the given list is empty.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_list_empty_p(pm_list_t *list) {
+    return list->head == NULL;
+}
+
+/**
+ * Returns the size of the list.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_list_size(pm_list_t *list) {
+    return list->size;
+}
+
+/**
+ * Append a node to the given list.
+ */
+void
+pm_list_append(pm_list_t *list, pm_list_node_t *node) {
+    if (list->head == NULL) {
+        list->head = node;
+    } else {
+        list->tail->next = node;
+    }
+
+    list->tail = node;
+    list->size++;
+}
+
+/**
+ * Deallocate the internal state of the given list.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_list_free(pm_list_t *list) {
+    pm_list_node_t *node = list->head;
+    pm_list_node_t *next;
+
+    while (node != NULL) {
+        next = node->next;
+        xfree(node);
+        node = next;
+    }
+
+    list->size = 0;
+}
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
new file mode 100644
index 0000000000..3512dee979
--- /dev/null
+++ b/prism/util/pm_list.h
@@ -0,0 +1,97 @@
+/**
+ * @file pm_list.h
+ *
+ * An abstract linked list.
+ */
+#ifndef PRISM_LIST_H
+#define PRISM_LIST_H
+
+#include "prism/defines.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * This struct represents an abstract linked list that provides common
+ * functionality. It is meant to be used any time a linked list is necessary to
+ * store data.
+ *
+ * The linked list itself operates off a set of pointers. Because the pointers
+ * are not necessarily sequential, they can be of any size. We use this fact to
+ * allow the consumer of this linked list to extend the node struct to include
+ * any data they want. This is done by using the pm_list_node_t as the first
+ * member of the struct.
+ *
+ * For example, if we want to store a list of integers, we can do the following:
+ *
+ * ```c
+ * typedef struct {
+ *     pm_list_node_t node;
+ *     int value;
+ * } pm_int_node_t;
+ *
+ * pm_list_t list = { 0 };
+ * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t));
+ * node->value = 5;
+ *
+ * pm_list_append(&list, &node->node);
+ * ```
+ *
+ * The pm_list_t struct is used to represent the overall linked list. It
+ * contains a pointer to the head and tail of the list. This allows for easy
+ * iteration and appending of new nodes.
+ */
+typedef struct pm_list_node {
+    /** A pointer to the next node in the list. */
+    struct pm_list_node *next;
+} pm_list_node_t;
+
+/**
+ * This represents the overall linked list. It keeps a pointer to the head and
+ * tail so that iteration is easy and pushing new nodes is easy.
+ */
+typedef struct {
+    /** The size of the list. */
+    size_t size;
+
+    /** A pointer to the head of the list. */
+    pm_list_node_t *head;
+
+    /** A pointer to the tail of the list. */
+    pm_list_node_t *tail;
+} pm_list_t;
+
+/**
+ * Returns true if the given list is empty.
+ *
+ * @param list The list to check.
+ * @return True if the given list is empty, otherwise false.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
+
+/**
+ * Returns the size of the list.
+ *
+ * @param list The list to check.
+ * @return The size of the list.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
+
+/**
+ * Append a node to the given list.
+ *
+ * @param list The list to append to.
+ * @param node The node to append.
+ */
+void pm_list_append(pm_list_t *list, pm_list_node_t *node);
+
+/**
+ * Deallocate the internal state of the given list.
+ *
+ * @param list The list to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
+
+#endif
diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c
new file mode 100644
index 0000000000..7ea20ace6d
--- /dev/null
+++ b/prism/util/pm_memchr.c
@@ -0,0 +1,35 @@
+#include "prism/util/pm_memchr.h"
+
+#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+
+/**
+ * We need to roll our own memchr to handle cases where the encoding changes and
+ * we need to search for a character in a buffer that could be the trailing byte
+ * of a multibyte character.
+ */
+void *
+pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
+    if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
+        const uint8_t *source = (const uint8_t *) memory;
+        size_t index = 0;
+
+        while (index < number) {
+            if (source[index] == character) {
+                return (void *) (source + index);
+            }
+
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
+            if (width == 0) {
+                return NULL;
+            }
+
+            index += width;
+        }
+
+        return NULL;
+    } else {
+        return memchr(memory, character, number);
+    }
+}
+
+#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
new file mode 100644
index 0000000000..e0671eaed3
--- /dev/null
+++ b/prism/util/pm_memchr.h
@@ -0,0 +1,29 @@
+/**
+ * @file pm_memchr.h
+ *
+ * A custom memchr implementation.
+ */
+#ifndef PRISM_MEMCHR_H
+#define PRISM_MEMCHR_H
+
+#include "prism/defines.h"
+#include "prism/encoding.h"
+
+#include <stddef.h>
+
+/**
+ * We need to roll our own memchr to handle cases where the encoding changes and
+ * we need to search for a character in a buffer that could be the trailing byte
+ * of a multibyte character.
+ *
+ * @param source The source string.
+ * @param character The character to search for.
+ * @param number The maximum number of bytes to search.
+ * @param encoding_changed Whether the encoding changed.
+ * @param encoding A pointer to the encoding.
+ * @return A pointer to the first occurrence of the character in the source
+ *     string, or NULL if no such character exists.
+ */
+void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
+
+#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
new file mode 100644
index 0000000000..ce07ce8c8e
--- /dev/null
+++ b/prism/util/pm_newline_list.c
@@ -0,0 +1,96 @@
+#include "prism/util/pm_newline_list.h"
+
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ */
+bool
+pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
+    list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
+    if (list->offsets == NULL) return false;
+
+    list->start = start;
+
+    // This is 1 instead of 0 because we want to include the first line of the
+    // file as having offset 0, which is set because of calloc.
+    list->size = 1;
+    list->capacity = capacity;
+
+    return true;
+}
+
+/**
+ * Clear out the newlines that have been appended to the list.
+ */
+void
+pm_newline_list_clear(pm_newline_list_t *list) {
+    list->size = 1;
+}
+
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ */
+bool
+pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
+    if (list->size == list->capacity) {
+        size_t *original_offsets = list->offsets;
+
+        list->capacity = (list->capacity * 3) / 2;
+        list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
+        if (list->offsets == NULL) return false;
+
+        memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
+        xfree(original_offsets);
+    }
+
+    assert(*cursor == '\n');
+    assert(cursor >= list->start);
+    size_t newline_offset = (size_t) (cursor - list->start + 1);
+
+    assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
+    list->offsets[list->size++] = newline_offset;
+
+    return true;
+}
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ */
+pm_line_column_t
+pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
+    assert(cursor >= list->start);
+    size_t offset = (size_t) (cursor - list->start);
+
+    size_t left = 0;
+    size_t right = list->size - 1;
+
+    while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+
+        if (list->offsets[mid] == offset) {
+            return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
+        }
+
+        if (list->offsets[mid] < offset) {
+            left = mid + 1;
+        } else {
+            right = mid - 1;
+        }
+    }
+
+    return ((pm_line_column_t) {
+        .line = ((int32_t) left) + start_line - 1,
+        .column = (uint32_t) (offset - list->offsets[left - 1])
+    });
+}
+
+/**
+ * Free the internal memory allocated for the newline list.
+ */
+void
+pm_newline_list_free(pm_newline_list_t *list) {
+    xfree(list->offsets);
+}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
new file mode 100644
index 0000000000..7ae9b6b3da
--- /dev/null
+++ b/prism/util/pm_newline_list.h
@@ -0,0 +1,102 @@
+/**
+ * @file pm_newline_list.h
+ *
+ * A list of byte offsets of newlines in a string.
+ *
+ * When compiling the syntax tree, it's necessary to know the line and column
+ * of many nodes. This is necessary to support things like error messages,
+ * tracepoints, etc.
+ *
+ * It's possible that we could store the start line, start column, end line, and
+ * end column on every node in addition to the offsets that we already store,
+ * but that would be quite a lot of memory overhead.
+ */
+#ifndef PRISM_NEWLINE_LIST_H
+#define PRISM_NEWLINE_LIST_H
+
+#include "prism/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+/**
+ * A list of offsets of newlines in a string. The offsets are assumed to be
+ * sorted/inserted in ascending order.
+ */
+typedef struct {
+    /** A pointer to the start of the source string. */
+    const uint8_t *start;
+
+    /** The number of offsets in the list. */
+    size_t size;
+
+    /** The capacity of the list that has been allocated. */
+    size_t capacity;
+
+    /** The list of offsets. */
+    size_t *offsets;
+} pm_newline_list_t;
+
+/**
+ * A line and column in a string.
+ */
+typedef struct {
+    /** The line number. */
+    int32_t line;
+
+    /** The column number. */
+    uint32_t column;
+} pm_line_column_t;
+
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ *
+ * @param list The list to initialize.
+ * @param start A pointer to the start of the source string.
+ * @param capacity The initial capacity of the list.
+ * @return True if the allocation of the offsets succeeds, otherwise false.
+ */
+bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
+
+/**
+ * Clear out the newlines that have been appended to the list.
+ *
+ * @param list The list to clear.
+ */
+void
+pm_newline_list_clear(pm_newline_list_t *list);
+
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ *
+ * @param list The list to append to.
+ * @param cursor A pointer to the offset to append.
+ * @return True if the reallocation of the offsets succeeds (if one was
+ *     necessary), otherwise false.
+ */
+bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ *
+ * @param list The list to search.
+ * @param cursor A pointer to the offset to search for.
+ * @param start_line The line to start counting from.
+ * @return The line and column of the given offset.
+ */
+pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
+
+/**
+ * Free the internal memory allocated for the newline list.
+ *
+ * @param list The list to free.
+ */
+void pm_newline_list_free(pm_newline_list_t *list);
+
+#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
new file mode 100644
index 0000000000..dfc121b6a2
--- /dev/null
+++ b/prism/util/pm_string.c
@@ -0,0 +1,320 @@
+#include "prism/util/pm_string.h"
+
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_sizeof(void) {
+    return sizeof(pm_string_t);
+}
+
+/**
+ * Initialize a shared string that is based on initial input.
+ */
+void
+pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
+    assert(start <= end);
+
+    *string = (pm_string_t) {
+        .type = PM_STRING_SHARED,
+        .source = start,
+        .length = (size_t) (end - start)
+    };
+}
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ */
+void
+pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_OWNED,
+        .source = source,
+        .length = length
+    };
+}
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ */
+void
+pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
+    *string = (pm_string_t) {
+        .type = PM_STRING_CONSTANT,
+        .source = (const uint8_t *) source,
+        .length = length
+    };
+}
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_string_mapped_init(pm_string_t *string, const char *filepath) {
+#ifdef _WIN32
+    // Open the file for reading.
+    HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (file == INVALID_HANDLE_VALUE) {
+        return false;
+    }
+
+    // Get the file size.
+    DWORD file_size = GetFileSize(file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        CloseHandle(file);
+        return false;
+    }
+
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (file_size == 0) {
+        CloseHandle(file);
+        const uint8_t source[] = "";
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
+        return true;
+    }
+
+    // Create a mapping of the file.
+    HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (mapping == NULL) {
+        CloseHandle(file);
+        return false;
+    }
+
+    // Map the file into memory.
+    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    CloseHandle(mapping);
+    CloseHandle(file);
+
+    if (source == NULL) {
+        return false;
+    }
+
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
+    return true;
+#elif defined(_POSIX_MAPPED_FILES)
+    // Open the file for reading
+    int fd = open(filepath, O_RDONLY);
+    if (fd == -1) {
+        return false;
+    }
+
+    // Stat the file to get the file size
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        return false;
+    }
+
+    // mmap the file descriptor to virtually get the contents
+    size_t size = (size_t) sb.st_size;
+    uint8_t *source = NULL;
+
+    if (size == 0) {
+        close(fd);
+        const uint8_t source[] = "";
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
+        return true;
+    }
+
+    source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (source == MAP_FAILED) {
+        return false;
+    }
+
+    close(fd);
+    *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
+    return true;
+#else
+    (void) string;
+    (void) filepath;
+    perror("pm_string_mapped_init is not implemented for this platform");
+    return false;
+#endif
+}
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ */
+PRISM_EXPORTED_FUNCTION bool
+pm_string_file_init(pm_string_t *string, const char *filepath) {
+#ifdef _WIN32
+    // Open the file for reading.
+    HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (file == INVALID_HANDLE_VALUE) {
+        return false;
+    }
+
+    // Get the file size.
+    DWORD file_size = GetFileSize(file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        CloseHandle(file);
+        return false;
+    }
+
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (file_size == 0) {
+        CloseHandle(file);
+        const uint8_t source[] = "";
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
+        return true;
+    }
+
+    // Create a buffer to read the file into.
+    uint8_t *source = xmalloc(file_size);
+    if (source == NULL) {
+        CloseHandle(file);
+        return false;
+    }
+
+    // Read the contents of the file
+    DWORD bytes_read;
+    if (!ReadFile(file, source, file_size, &bytes_read, NULL)) {
+        CloseHandle(file);
+        return false;
+    }
+
+    // Check the number of bytes read
+    if (bytes_read != file_size) {
+        xfree(source);
+        CloseHandle(file);
+        return false;
+    }
+
+    CloseHandle(file);
+    *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
+    return true;
+#elif defined(_POSIX_MAPPED_FILES)
+    FILE *file = fopen(filepath, "rb");
+    if (file == NULL) {
+        return false;
+    }
+
+    fseek(file, 0, SEEK_END);
+    long file_size = ftell(file);
+
+    if (file_size == -1) {
+        fclose(file);
+        return false;
+    }
+
+    if (file_size == 0) {
+        fclose(file);
+        const uint8_t source[] = "";
+        *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
+        return true;
+    }
+
+    size_t length = (size_t) file_size;
+    uint8_t *source = xmalloc(length);
+    if (source == NULL) {
+        fclose(file);
+        return false;
+    }
+
+    fseek(file, 0, SEEK_SET);
+    size_t bytes_read = fread(source, length, 1, file);
+    fclose(file);
+
+    if (bytes_read != 1) {
+        xfree(source);
+        return false;
+    }
+
+    *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
+    return true;
+#else
+    (void) string;
+    (void) filepath;
+    perror("pm_string_file_init is not implemented for this platform");
+    return false;
+#endif
+}
+
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ */
+void
+pm_string_ensure_owned(pm_string_t *string) {
+    if (string->type == PM_STRING_OWNED) return;
+
+    size_t length = pm_string_length(string);
+    const uint8_t *source = pm_string_source(string);
+
+    uint8_t *memory = xmalloc(length);
+    if (!memory) return;
+
+    pm_string_owned_init(string, memory, length);
+    memcpy((void *) string->source, source, length);
+}
+
+/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int
+pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
+    size_t left_length = pm_string_length(left);
+    size_t right_length = pm_string_length(right);
+
+    if (left_length < right_length) {
+        return -1;
+    } else if (left_length > right_length) {
+        return 1;
+    }
+
+    return memcmp(pm_string_source(left), pm_string_source(right), left_length);
+}
+
+/**
+ * Returns the length associated with the string.
+ */
+PRISM_EXPORTED_FUNCTION size_t
+pm_string_length(const pm_string_t *string) {
+    return string->length;
+}
+
+/**
+ * Returns the start pointer associated with the string.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t *
+pm_string_source(const pm_string_t *string) {
+    return string->source;
+}
+
+/**
+ * Free the associated memory of the given string.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_string_free(pm_string_t *string) {
+    void *memory = (void *) string->source;
+
+    if (string->type == PM_STRING_OWNED) {
+        xfree(memory);
+#ifdef PRISM_HAS_MMAP
+    } else if (string->type == PM_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+        UnmapViewOfFile(memory);
+#elif defined(_POSIX_MAPPED_FILES)
+        munmap(memory, string->length);
+#endif
+#endif /* PRISM_HAS_MMAP */
+    }
+}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
new file mode 100644
index 0000000000..d23792c0ba
--- /dev/null
+++ b/prism/util/pm_string.h
@@ -0,0 +1,166 @@
+/**
+ * @file pm_string.h
+ *
+ * A generic string type that can have various ownership semantics.
+ */
+#ifndef PRISM_STRING_H
+#define PRISM_STRING_H
+
+#include "prism/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// The following headers are necessary to read files using demand paging.
+#ifdef _WIN32
+#include <windows.h>
+#elif defined(_POSIX_MAPPED_FILES)
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#endif
+
+/**
+ * A generic string type that can have various ownership semantics.
+ */
+typedef struct {
+    /** A pointer to the start of the string. */
+    const uint8_t *source;
+
+    /** The length of the string in bytes of memory. */
+    size_t length;
+
+    /** The type of the string. This field determines how the string should be freed. */
+    enum {
+        /** This string is a constant string, and should not be freed. */
+        PM_STRING_CONSTANT,
+
+        /** This is a slice of another string, and should not be freed. */
+        PM_STRING_SHARED,
+
+        /** This string owns its memory, and should be freed using `pm_string_free`. */
+        PM_STRING_OWNED,
+
+#ifdef PRISM_HAS_MMAP
+        /** This string is a memory-mapped file, and should be freed using `pm_string_free`. */
+        PM_STRING_MAPPED
+#endif
+    } type;
+} pm_string_t;
+
+/**
+ * Returns the size of the pm_string_t struct. This is necessary to allocate the
+ * correct amount of memory in the FFI backend.
+ *
+ * @return The size of the pm_string_t struct.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
+
+/**
+ * Defines an empty string. This is useful for initializing a string that will
+ * be filled in later.
+ */
+#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
+
+/**
+ * Initialize a shared string that is based on initial input.
+ *
+ * @param string The string to initialize.
+ * @param start The start of the string.
+ * @param end The end of the string.
+ */
+void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * We want to use demand paging as much as possible in order to avoid having to
+ * read the entire file into memory (which could be detrimental to performance
+ * for large files). This means that if we're on windows we'll use
+ * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+ * `mmap`, and on other POSIX systems we'll use `read`.
+ *
+ * @param string The string to initialize.
+ * @param filepath The filepath to read.
+ * @return Whether or not the file was successfully mapped.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath);
+
+/**
+ * Read the file indicated by the filepath parameter into source and load its
+ * contents and size into the given `pm_string_t`. The given `pm_string_t`
+ * should be freed using `pm_string_free` when it is no longer used.
+ *
+ * @param string The string to initialize.
+ * @param filepath The filepath to read.
+ * @return Whether or not the file was successfully read.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_string_file_init(pm_string_t *string, const char *filepath);
+
+/**
+ * Ensure the string is owned. If it is not, then reinitialize it as owned and
+ * copy over the previous source.
+ *
+ * @param string The string to ensure is owned.
+ */
+void pm_string_ensure_owned(pm_string_t *string);
+
+/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ *
+ * @param left The left string to compare.
+ * @param right The right string to compare.
+ * @return The comparison result.
+ */
+int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
+
+/**
+ * Returns the length associated with the string.
+ *
+ * @param string The string to get the length of.
+ * @return The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
+
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @param string The string to get the start pointer of.
+ * @return The start pointer of the string.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
+
+/**
+ * Free the associated memory of the given string.
+ *
+ * @param string The string to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c
new file mode 100644
index 0000000000..2240bf8110
--- /dev/null
+++ b/prism/util/pm_strncasecmp.c
@@ -0,0 +1,24 @@
+#include "prism/util/pm_strncasecmp.h"
+
+/**
+ * Compare two strings, ignoring case, up to the given length. Returns 0 if the
+ * strings are equal, a negative number if string1 is less than string2, or a
+ * positive number if string1 is greater than string2.
+ *
+ * Note that this is effectively our own implementation of strncasecmp, but it's
+ * not available on all of the platforms we want to support so we're rolling it
+ * here.
+ */
+int
+pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
+    size_t offset = 0;
+    int difference = 0;
+
+    while (offset < length && string1[offset] != '\0') {
+        if (string2[offset] == '\0') return string1[offset];
+        if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
+        offset++;
+    }
+
+    return difference;
+}
diff --git a/prism/util/pm_strncasecmp.h b/prism/util/pm_strncasecmp.h
new file mode 100644
index 0000000000..5cb88cb5eb
--- /dev/null
+++ b/prism/util/pm_strncasecmp.h
@@ -0,0 +1,32 @@
+/**
+ * @file pm_strncasecmp.h
+ *
+ * A custom strncasecmp implementation.
+ */
+#ifndef PRISM_STRNCASECMP_H
+#define PRISM_STRNCASECMP_H
+
+#include "prism/defines.h"
+
+#include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Compare two strings, ignoring case, up to the given length. Returns 0 if the
+ * strings are equal, a negative number if string1 is less than string2, or a
+ * positive number if string1 is greater than string2.
+ *
+ * Note that this is effectively our own implementation of strncasecmp, but it's
+ * not available on all of the platforms we want to support so we're rolling it
+ * here.
+ *
+ * @param string1 The first string to compare.
+ * @param string2 The second string to compare
+ * @param length The maximum number of characters to compare.
+ * @return 0 if the strings are equal, a negative number if string1 is less than
+ *     string2, or a positive number if string1 is greater than string2.
+ */
+int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
+
+#endif
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
new file mode 100644
index 0000000000..916a4cc3fd
--- /dev/null
+++ b/prism/util/pm_strpbrk.c
@@ -0,0 +1,206 @@
+#include "prism/util/pm_strpbrk.h"
+
+/**
+ * Add an invalid multibyte character error to the parser.
+ */
+static inline void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
+}
+
+/**
+ * Set the explicit encoding for the parser to the current encoding.
+ */
+static inline void
+pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
+    if (parser->explicit_encoding != NULL) {
+        if (parser->explicit_encoding == parser->encoding) {
+            // Okay, we already locked to this encoding.
+        } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+            // Not okay, we already found a Unicode escape sequence and this
+            // conflicts.
+            pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+        } else {
+            // Should not be anything else.
+            assert(false && "unreachable");
+        }
+    }
+
+    parser->explicit_encoding = parser->encoding;
+}
+
+/**
+ * This is the default path.
+ */
+static inline const uint8_t *
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
+
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the path when the encoding is ASCII-8BIT.
+ */
+static inline const uint8_t *
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
+        index++;
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the slow path that does care about the encoding.
+ */
+static inline const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    const pm_encoding_t *encoding = parser->encoding;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80) {
+            index++;
+        } else {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
+
+            if (width > 0) {
+                index += width;
+            } else if (!validate) {
+                index++;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static inline const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
+    size_t index = 0;
+    const pm_encoding_t *encoding = parser->encoding;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        if (source[index] < 0x80 || !validate) {
+            index++;
+        } else {
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+            pm_strpbrk_explicit_encoding_set(parser, source, width);
+
+            if (width > 0) {
+                index += width;
+            } else {
+                // At this point we know we have an invalid multibyte character.
+                // We'll walk forward as far as we can until we find the next
+                // valid character so that we don't spam the user with a ton of
+                // the same kind of error.
+                const size_t start = index;
+
+                do {
+                    index++;
+                } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+                pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
+            }
+        }
+    }
+
+    return NULL;
+}
+
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
+const uint8_t *
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
+    if (length <= 0) {
+        return NULL;
+    } else if (!parser->encoding_changed) {
+        return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
+    } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+        return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
+    } else if (parser->encoding->multibyte) {
+        return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
+    } else {
+        return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
+    }
+}
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
new file mode 100644
index 0000000000..f387bd5782
--- /dev/null
+++ b/prism/util/pm_strpbrk.h
@@ -0,0 +1,46 @@
+/**
+ * @file pm_strpbrk.h
+ *
+ * A custom strpbrk implementation.
+ */
+#ifndef PRISM_STRPBRK_H
+#define PRISM_STRPBRK_H
+
+#include "prism/defines.h"
+#include "prism/diagnostic.h"
+#include "prism/parser.h"
+
+#include <stddef.h>
+#include <string.h>
+
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ *
+ * @param parser The parser.
+ * @param source The source to search.
+ * @param charset The charset to search for.
+ * @param length The maximum number of bytes to search.
+ * @param validate Whether to validate that the source string is valid in the
+ *     current encoding of the parser.
+ * @return A pointer to the first character in the source string that is in the
+ *     charset, or NULL if no such character exists.
+ */
+const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
+
+#endif