summaryrefslogtreecommitdiff
path: root/prism/util
diff options
context:
space:
mode:
Diffstat (limited to 'prism/util')
-rw-r--r--prism/util/pm_buffer.c357
-rw-r--r--prism/util/pm_buffer.h228
-rw-r--r--prism/util/pm_char.c318
-rw-r--r--prism/util/pm_char.h204
-rw-r--r--prism/util/pm_constant_pool.c342
-rw-r--r--prism/util/pm_constant_pool.h218
-rw-r--r--prism/util/pm_integer.c670
-rw-r--r--prism/util/pm_integer.h126
-rw-r--r--prism/util/pm_list.c49
-rw-r--r--prism/util/pm_list.h97
-rw-r--r--prism/util/pm_memchr.c35
-rw-r--r--prism/util/pm_memchr.h29
-rw-r--r--prism/util/pm_newline_list.c125
-rw-r--r--prism/util/pm_newline_list.h113
-rw-r--r--prism/util/pm_string.c383
-rw-r--r--prism/util/pm_string.h190
-rw-r--r--prism/util/pm_strncasecmp.c36
-rw-r--r--prism/util/pm_strncasecmp.h32
-rw-r--r--prism/util/pm_strpbrk.c206
-rw-r--r--prism/util/pm_strpbrk.h46
20 files changed, 0 insertions, 3804 deletions
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
deleted file mode 100644
index 2136a7c43e..0000000000
--- a/prism/util/pm_buffer.c
+++ /dev/null
@@ -1,357 +0,0 @@
-#include "prism/util/pm_buffer.h"
-
-/**
- * Return the size of the pm_buffer_t struct.
- */
-size_t
-pm_buffer_sizeof(void) {
- return sizeof(pm_buffer_t);
-}
-
-/**
- * Initialize a pm_buffer_t with the given capacity.
- */
-bool
-pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
- buffer->length = 0;
- buffer->capacity = capacity;
-
- buffer->value = (char *) xmalloc(capacity);
- return buffer->value != NULL;
-}
-
-/**
- * Initialize a pm_buffer_t with its default values.
- */
-bool
-pm_buffer_init(pm_buffer_t *buffer) {
- return pm_buffer_init_capacity(buffer, 1024);
-}
-
-/**
- * Return the value of the buffer.
- */
-char *
-pm_buffer_value(const pm_buffer_t *buffer) {
- return buffer->value;
-}
-
-/**
- * Return the length of the buffer.
- */
-size_t
-pm_buffer_length(const pm_buffer_t *buffer) {
- return buffer->length;
-}
-
-/**
- * Append the given amount of space to the buffer.
- */
-static inline bool
-pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
- size_t next_length = buffer->length + length;
-
- if (next_length > buffer->capacity) {
- if (buffer->capacity == 0) {
- buffer->capacity = 1;
- }
-
- while (next_length > buffer->capacity) {
- buffer->capacity *= 2;
- }
-
- buffer->value = xrealloc(buffer->value, buffer->capacity);
- if (buffer->value == NULL) return false;
- }
-
- buffer->length = next_length;
- return true;
-}
-
-/**
- * Append a generic pointer to memory to the buffer.
- */
-static inline void
-pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
- size_t cursor = buffer->length;
- if (pm_buffer_append_length(buffer, length)) {
- memcpy(buffer->value + cursor, source, length);
- }
-}
-
-/**
- * Append the given amount of space as zeroes to the buffer.
- */
-void
-pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) {
- size_t cursor = buffer->length;
- if (pm_buffer_append_length(buffer, length)) {
- memset(buffer->value + cursor, 0, length);
- }
-}
-
-/**
- * Append a formatted string to the buffer.
- */
-void
-pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) {
- va_list arguments;
- va_start(arguments, format);
- int result = vsnprintf(NULL, 0, format, arguments);
- va_end(arguments);
-
- if (result < 0) return;
- size_t length = (size_t) (result + 1);
-
- size_t cursor = buffer->length;
- if (pm_buffer_append_length(buffer, length)) {
- va_start(arguments, format);
- vsnprintf(buffer->value + cursor, length, format, arguments);
- va_end(arguments);
- buffer->length--;
- }
-}
-
-/**
- * Append a string to the buffer.
- */
-void
-pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length) {
- pm_buffer_append(buffer, value, length);
-}
-
-/**
- * Append a list of bytes to the buffer.
- */
-void
-pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) {
- pm_buffer_append(buffer, (const char *) value, length);
-}
-
-/**
- * Append a single byte to the buffer.
- */
-void
-pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) {
- const void *source = &value;
- pm_buffer_append(buffer, source, sizeof(uint8_t));
-}
-
-/**
- * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
- */
-void
-pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
- if (value < 128) {
- pm_buffer_append_byte(buffer, (uint8_t) value);
- } else {
- uint32_t n = value;
- while (n >= 128) {
- pm_buffer_append_byte(buffer, (uint8_t) (n | 128));
- n >>= 7;
- }
- pm_buffer_append_byte(buffer, (uint8_t) n);
- }
-}
-
-/**
- * Append a 32-bit signed integer to the buffer as a variable-length integer.
- */
-void
-pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
- uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
- pm_buffer_append_varuint(buffer, unsigned_int);
-}
-
-/**
- * Append a double to the buffer.
- */
-void
-pm_buffer_append_double(pm_buffer_t *buffer, double value) {
- const void *source = &value;
- pm_buffer_append(buffer, source, sizeof(double));
-}
-
-/**
- * Append a unicode codepoint to the buffer.
- */
-bool
-pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value) {
- if (value <= 0x7F) {
- pm_buffer_append_byte(buffer, (uint8_t) value); // 0xxxxxxx
- return true;
- } else if (value <= 0x7FF) {
- uint8_t bytes[] = {
- (uint8_t) (0xC0 | ((value >> 6) & 0x3F)), // 110xxxxx
- (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
- };
-
- pm_buffer_append_bytes(buffer, bytes, 2);
- return true;
- } else if (value <= 0xFFFF) {
- uint8_t bytes[] = {
- (uint8_t) (0xE0 | ((value >> 12) & 0x3F)), // 1110xxxx
- (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
- (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
- };
-
- pm_buffer_append_bytes(buffer, bytes, 3);
- return true;
- } else if (value <= 0x10FFFF) {
- uint8_t bytes[] = {
- (uint8_t) (0xF0 | ((value >> 18) & 0x3F)), // 11110xxx
- (uint8_t) (0x80 | ((value >> 12) & 0x3F)), // 10xxxxxx
- (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx
- (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx
- };
-
- pm_buffer_append_bytes(buffer, bytes, 4);
- return true;
- } else {
- return false;
- }
-}
-
-/**
- * Append a slice of source code to the buffer.
- */
-void
-pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) {
- for (size_t index = 0; index < length; index++) {
- const uint8_t byte = source[index];
-
- if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
- if (escaping == PM_BUFFER_ESCAPING_RUBY) {
- pm_buffer_append_format(buffer, "\\x%02X", byte);
- } else {
- pm_buffer_append_format(buffer, "\\u%04X", byte);
- }
- } else {
- switch (byte) {
- case '\a':
- if (escaping == PM_BUFFER_ESCAPING_RUBY) {
- pm_buffer_append_string(buffer, "\\a", 2);
- } else {
- pm_buffer_append_format(buffer, "\\u%04X", byte);
- }
- break;
- case '\b':
- pm_buffer_append_string(buffer, "\\b", 2);
- break;
- case '\t':
- pm_buffer_append_string(buffer, "\\t", 2);
- break;
- case '\n':
- pm_buffer_append_string(buffer, "\\n", 2);
- break;
- case '\v':
- if (escaping == PM_BUFFER_ESCAPING_RUBY) {
- pm_buffer_append_string(buffer, "\\v", 2);
- } else {
- pm_buffer_append_format(buffer, "\\u%04X", byte);
- }
- break;
- case '\f':
- pm_buffer_append_string(buffer, "\\f", 2);
- break;
- case '\r':
- pm_buffer_append_string(buffer, "\\r", 2);
- break;
- case '"':
- pm_buffer_append_string(buffer, "\\\"", 2);
- break;
- case '#': {
- if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) {
- const uint8_t next_byte = source[index + 1];
- if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
- pm_buffer_append_byte(buffer, '\\');
- }
- }
-
- pm_buffer_append_byte(buffer, '#');
- break;
- }
- case '\\':
- pm_buffer_append_string(buffer, "\\\\", 2);
- break;
- default:
- pm_buffer_append_byte(buffer, byte);
- break;
- }
- }
- }
-}
-
-/**
- * Prepend the given string to the buffer.
- */
-void
-pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
- size_t cursor = buffer->length;
- if (pm_buffer_append_length(buffer, length)) {
- memmove(buffer->value + length, buffer->value, cursor);
- memcpy(buffer->value, value, length);
- }
-}
-
-/**
- * Concatenate one buffer onto another.
- */
-void
-pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source) {
- if (source->length > 0) {
- pm_buffer_append(destination, source->value, source->length);
- }
-}
-
-/**
- * Clear the buffer by reducing its size to 0. This does not free the allocated
- * memory, but it does allow the buffer to be reused.
- */
-void
-pm_buffer_clear(pm_buffer_t *buffer) {
- buffer->length = 0;
-}
-
-/**
- * Strip the whitespace from the end of the buffer.
- */
-void
-pm_buffer_rstrip(pm_buffer_t *buffer) {
- while (buffer->length > 0 && pm_char_is_whitespace((uint8_t) buffer->value[buffer->length - 1])) {
- buffer->length--;
- }
-}
-
-/**
- * Checks if the buffer includes the given value.
- */
-size_t
-pm_buffer_index(const pm_buffer_t *buffer, char value) {
- const char *first = memchr(buffer->value, value, buffer->length);
- return (first == NULL) ? SIZE_MAX : (size_t) (first - buffer->value);
-}
-
-/**
- * Insert the given string into the buffer at the given index.
- */
-void
-pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) {
- assert(index <= buffer->length);
-
- if (index == buffer->length) {
- pm_buffer_append_string(buffer, value, length);
- } else {
- pm_buffer_append_zeroes(buffer, length);
- memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index);
- memcpy(buffer->value + index, value, length);
- }
-}
-
-/**
- * Free the memory associated with the buffer.
- */
-void
-pm_buffer_free(pm_buffer_t *buffer) {
- xfree(buffer->value);
-}
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
deleted file mode 100644
index f3c20ab2a5..0000000000
--- a/prism/util/pm_buffer.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/**
- * @file pm_buffer.h
- *
- * A wrapper around a contiguous block of allocated memory.
- */
-#ifndef PRISM_BUFFER_H
-#define PRISM_BUFFER_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_char.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * A pm_buffer_t is a simple memory buffer that stores data in a contiguous
- * block of memory.
- */
-typedef struct {
- /** The length of the buffer in bytes. */
- size_t length;
-
- /** The capacity of the buffer in bytes that has been allocated. */
- size_t capacity;
-
- /** A pointer to the start of the buffer. */
- char *value;
-} pm_buffer_t;
-
-/**
- * Return the size of the pm_buffer_t struct.
- *
- * @returns The size of the pm_buffer_t struct.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
-
-/**
- * Initialize a pm_buffer_t with the given capacity.
- *
- * @param buffer The buffer to initialize.
- * @param capacity The capacity of the buffer.
- * @returns True if the buffer was initialized successfully, false otherwise.
- */
-bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
-
-/**
- * Initialize a pm_buffer_t with its default values.
- *
- * @param buffer The buffer to initialize.
- * @returns True if the buffer was initialized successfully, false otherwise.
- */
-PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
-
-/**
- * Return the value of the buffer.
- *
- * @param buffer The buffer to get the value of.
- * @returns The value of the buffer.
- */
-PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer);
-
-/**
- * Return the length of the buffer.
- *
- * @param buffer The buffer to get the length of.
- * @returns The length of the buffer.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer);
-
-/**
- * Append the given amount of space as zeroes to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param length The amount of space to append and zero.
- */
-void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
-
-/**
- * Append a formatted string to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param format The format string to append.
- * @param ... The arguments to the format string.
- */
-void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
-
-/**
- * Append a string to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The string to append.
- * @param length The length of the string to append.
- */
-void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
-
-/**
- * Append a list of bytes to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The bytes to append.
- * @param length The length of the bytes to append.
- */
-void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
-
-/**
- * Append a single byte to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The byte to append.
- */
-void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
-
-/**
- * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
- *
- * @param buffer The buffer to append to.
- * @param value The integer to append.
- */
-void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
-
-/**
- * Append a 32-bit signed integer to the buffer as a variable-length integer.
- *
- * @param buffer The buffer to append to.
- * @param value The integer to append.
- */
-void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
-
-/**
- * Append a double to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The double to append.
- */
-void pm_buffer_append_double(pm_buffer_t *buffer, double value);
-
-/**
- * Append a unicode codepoint to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The character to append.
- * @returns True if the codepoint was valid and appended successfully, false
- * otherwise.
- */
-bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
-
-/**
- * The different types of escaping that can be performed by the buffer when
- * appending a slice of Ruby source code.
- */
-typedef enum {
- PM_BUFFER_ESCAPING_RUBY,
- PM_BUFFER_ESCAPING_JSON
-} pm_buffer_escaping_t;
-
-/**
- * Append a slice of source code to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param source The source code to append.
- * @param length The length of the source code to append.
- * @param escaping The type of escaping to perform.
- */
-void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
-
-/**
- * Prepend the given string to the buffer.
- *
- * @param buffer The buffer to prepend to.
- * @param value The string to prepend.
- * @param length The length of the string to prepend.
- */
-void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
-
-/**
- * Concatenate one buffer onto another.
- *
- * @param destination The buffer to concatenate onto.
- * @param source The buffer to concatenate.
- */
-void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
-
-/**
- * Clear the buffer by reducing its size to 0. This does not free the allocated
- * memory, but it does allow the buffer to be reused.
- *
- * @param buffer The buffer to clear.
- */
-void pm_buffer_clear(pm_buffer_t *buffer);
-
-/**
- * Strip the whitespace from the end of the buffer.
- *
- * @param buffer The buffer to strip.
- */
-void pm_buffer_rstrip(pm_buffer_t *buffer);
-
-/**
- * Checks if the buffer includes the given value.
- *
- * @param buffer The buffer to check.
- * @param value The value to check for.
- * @returns The index of the first occurrence of the value in the buffer, or
- * SIZE_MAX if the value is not found.
- */
-size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
-
-/**
- * Insert the given string into the buffer at the given index.
- *
- * @param buffer The buffer to insert into.
- * @param index The index to insert at.
- * @param value The string to insert.
- * @param length The length of the string to insert.
- */
-void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
-
-/**
- * Free the memory associated with the buffer.
- *
- * @param buffer The buffer to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
-
-#endif
diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c
deleted file mode 100644
index a51dc11645..0000000000
--- a/prism/util/pm_char.c
+++ /dev/null
@@ -1,318 +0,0 @@
-#include "prism/util/pm_char.h"
-
-#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
-#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
-#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
-
-#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
-#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
-#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
-#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
-#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
-#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
-#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
-#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
-
-static const uint8_t pm_byte_table[256] = {
-// 0 1 2 3 4 5 6 7 8 9 A B C D E F
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
- 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
- 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
- 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
-};
-
-static const uint8_t pm_number_table[256] = {
- // 0 1 2 3 4 5 6 7 8 9 A B C D E F
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
- 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
- 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
- 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
-};
-
-/**
- * Returns the number of characters at the start of the string that match the
- * given kind. Disallows searching past the given maximum number of characters.
- */
-static inline size_t
-pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
- if (length <= 0) return 0;
-
- size_t size = 0;
- size_t maximum = (size_t) length;
-
- while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
- return size;
-}
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
- return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
-}
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace while also tracking the location of each newline. Disallows
- * searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
- if (length <= 0) return 0;
-
- size_t size = 0;
- size_t maximum = (size_t) length;
-
- while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
- if (string[size] == '\n') {
- pm_newline_list_append(newline_list, string + size);
- }
-
- size++;
- }
-
- return size;
-}
-
-/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
- return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
-
-/**
- * Returns the number of characters at the start of the string that are regexp
- * options. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
- return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
-}
-
-/**
- * Returns true if the given character matches the given kind.
- */
-static inline bool
-pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
- return (pm_byte_table[b] & kind) != 0;
-}
-
-/**
- * Returns true if the given character is a whitespace character.
- */
-bool
-pm_char_is_whitespace(const uint8_t b) {
- return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
-}
-
-/**
- * Returns true if the given character is an inline whitespace character.
- */
-bool
-pm_char_is_inline_whitespace(const uint8_t b) {
- return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
-
-/**
- * Scan through the string and return the number of characters at the start of
- * the string that match the given kind. Disallows searching past the given
- * maximum number of characters.
- */
-static inline size_t
-pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
- if (length <= 0) return 0;
-
- size_t size = 0;
- size_t maximum = (size_t) length;
-
- while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
- return size;
-}
-
-/**
- * Scan through the string and return the number of characters at the start of
- * the string that match the given kind. Disallows searching past the given
- * maximum number of characters.
- *
- * Additionally, report the location of the last invalid underscore character
- * found in the string through the out invalid parameter.
- */
-static inline size_t
-pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
- if (length <= 0) return 0;
-
- size_t size = 0;
- size_t maximum = (size_t) length;
-
- bool underscore = false;
- while (size < maximum && (pm_number_table[string[size]] & kind)) {
- if (string[size] == '_') {
- if (underscore) *invalid = string + size;
- underscore = true;
- } else {
- underscore = false;
- }
-
- size++;
- }
-
- if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1;
- return size;
-}
-
-/**
- * Returns the number of characters at the start of the string that are binary
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- */
-size_t
-pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
- return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
-}
-
-/**
- * Returns the number of characters at the start of the string that are octal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- */
-size_t
-pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
- return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
-}
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
- return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
-}
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore
- */
-size_t
-pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
- return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
-}
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits. Disallows searching past the given maximum number of
- * characters.
- */
-size_t
-pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
- return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
-}
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits or underscores. Disallows searching past the given maximum
- * number of characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- */
-size_t
-pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
- return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
-}
-
-/**
- * Returns true if the given character matches the given kind.
- */
-static inline bool
-pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
- return (pm_number_table[b] & kind) != 0;
-}
-
-/**
- * Returns true if the given character is a binary digit.
- */
-bool
-pm_char_is_binary_digit(const uint8_t b) {
- return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
-}
-
-/**
- * Returns true if the given character is an octal digit.
- */
-bool
-pm_char_is_octal_digit(const uint8_t b) {
- return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
-}
-
-/**
- * Returns true if the given character is a decimal digit.
- */
-bool
-pm_char_is_decimal_digit(const uint8_t b) {
- return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
-}
-
-/**
- * Returns true if the given character is a hexadecimal digit.
- */
-bool
-pm_char_is_hexadecimal_digit(const uint8_t b) {
- return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
-}
-
-#undef PRISM_CHAR_BIT_WHITESPACE
-#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
-#undef PRISM_CHAR_BIT_REGEXP_OPTION
-
-#undef PRISM_NUMBER_BIT_BINARY_DIGIT
-#undef PRISM_NUMBER_BIT_BINARY_NUMBER
-#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
-#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
-#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
-#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
-#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
-#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
deleted file mode 100644
index deeafd6321..0000000000
--- a/prism/util/pm_char.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/**
- * @file pm_char.h
- *
- * Functions for working with characters and strings.
- */
-#ifndef PRISM_CHAR_H
-#define PRISM_CHAR_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_newline_list.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are
- * whitespace.
- */
-size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace while also tracking the location of each newline. Disallows
- * searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param newline_list The list of newlines to populate.
- * @return The number of characters at the start of the string that are
- * whitespace.
- */
-size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
-
-/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are inline
- * whitespace.
- */
-size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are decimal
- * digits.
- */
-size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits. Disallows searching past the given maximum number of
- * characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are
- * hexadecimal digits.
- */
-size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are octal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are octal
- * digits or underscores.
- */
-size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are decimal
- * digits or underscores.
- */
-size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits or underscores. Disallows searching past the given maximum
- * number of characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are
- * hexadecimal digits or underscores.
- */
-size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are regexp
- * options. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are regexp
- * options.
- */
-size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are binary
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are binary
- * digits or underscores.
- */
-size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns true if the given character is a whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is a whitespace character.
- */
-bool pm_char_is_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is an inline whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is an inline whitespace character.
- */
-bool pm_char_is_inline_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is a binary digit.
- *
- * @param b The character to check.
- * @return True if the given character is a binary digit.
- */
-bool pm_char_is_binary_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is an octal digit.
- *
- * @param b The character to check.
- * @return True if the given character is an octal digit.
- */
-bool pm_char_is_octal_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is a decimal digit.
- *
- * @param b The character to check.
- * @return True if the given character is a decimal digit.
- */
-bool pm_char_is_decimal_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is a hexadecimal digit.
- *
- * @param b The character to check.
- * @return True if the given character is a hexadecimal digit.
- */
-bool pm_char_is_hexadecimal_digit(const uint8_t b);
-
-#endif
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
deleted file mode 100644
index 38ea01a228..0000000000
--- a/prism/util/pm_constant_pool.c
+++ /dev/null
@@ -1,342 +0,0 @@
-#include "prism/util/pm_constant_pool.h"
-
-/**
- * Initialize a list of constant ids.
- */
-void
-pm_constant_id_list_init(pm_constant_id_list_t *list) {
- list->ids = NULL;
- list->size = 0;
- list->capacity = 0;
-}
-
-/**
- * Initialize a list of constant ids with a given capacity.
- */
-void
-pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
- if (capacity) {
- list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
- if (list->ids == NULL) abort();
- } else {
- list->ids = NULL;
- }
-
- list->size = 0;
- list->capacity = capacity;
-}
-
-/**
- * Append a constant id to a list of constant ids. Returns false if any
- * potential reallocations fail.
- */
-bool
-pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
- if (list->size >= list->capacity) {
- list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
- list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
- if (list->ids == NULL) return false;
- }
-
- list->ids[list->size++] = id;
- return true;
-}
-
-/**
- * Insert a constant id into a list of constant ids at the specified index.
- */
-void
-pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
- assert(index < list->capacity);
- assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
-
- list->ids[index] = id;
- list->size++;
-}
-
-/**
- * Checks if the current constant id list includes the given constant id.
- */
-bool
-pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
- for (size_t index = 0; index < list->size; index++) {
- if (list->ids[index] == id) return true;
- }
- return false;
-}
-
-/**
- * Free the memory associated with a list of constant ids.
- */
-void
-pm_constant_id_list_free(pm_constant_id_list_t *list) {
- if (list->ids != NULL) {
- xfree(list->ids);
- }
-}
-
-/**
- * A relatively simple hash function (djb2) that is used to hash strings. We are
- * optimizing here for simplicity and speed.
- */
-static inline uint32_t
-pm_constant_pool_hash(const uint8_t *start, size_t length) {
- // This is a prime number used as the initial value for the hash function.
- uint32_t value = 5381;
-
- for (size_t index = 0; index < length; index++) {
- value = ((value << 5) + value) + start[index];
- }
-
- return value;
-}
-
-/**
- * https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
- */
-static uint32_t
-next_power_of_two(uint32_t v) {
- // Avoid underflow in subtraction on next line.
- if (v == 0) {
- // 1 is the nearest power of 2 to 0 (2^0)
- return 1;
- }
- v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v++;
- return v;
-}
-
-#ifndef NDEBUG
-static bool
-is_power_of_two(uint32_t size) {
- return (size & (size - 1)) == 0;
-}
-#endif
-
-/**
- * Resize a constant pool to a given capacity.
- */
-static inline bool
-pm_constant_pool_resize(pm_constant_pool_t *pool) {
- assert(is_power_of_two(pool->capacity));
-
- uint32_t next_capacity = pool->capacity * 2;
- if (next_capacity < pool->capacity) return false;
-
- const uint32_t mask = next_capacity - 1;
- const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
-
- void *next = xcalloc(next_capacity, element_size);
- if (next == NULL) return false;
-
- pm_constant_pool_bucket_t *next_buckets = next;
- pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
-
- // For each bucket in the current constant pool, find the index in the
- // next constant pool, and insert it.
- for (uint32_t index = 0; index < pool->capacity; index++) {
- pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
-
- // If an id is set on this constant, then we know we have content here.
- // In this case we need to insert it into the next constant pool.
- if (bucket->id != PM_CONSTANT_ID_UNSET) {
- uint32_t next_index = bucket->hash & mask;
-
- // This implements linear scanning to find the next available slot
- // in case this index is already taken. We don't need to bother
- // comparing the values since we know that the hash is unique.
- while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
- next_index = (next_index + 1) & mask;
- }
-
- // Here we copy over the entire bucket, which includes the id so
- // that they are consistent between resizes.
- next_buckets[next_index] = *bucket;
- }
- }
-
- // The constants are stable with respect to hash table resizes.
- memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
-
- // pool->constants and pool->buckets are allocated out of the same chunk
- // of memory, with the buckets coming first.
- xfree(pool->buckets);
- pool->constants = next_constants;
- pool->buckets = next_buckets;
- pool->capacity = next_capacity;
- return true;
-}
-
-/**
- * Initialize a new constant pool with a given capacity.
- */
-bool
-pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
- const uint32_t maximum = (~((uint32_t) 0));
- if (capacity >= ((maximum / 2) + 1)) return false;
-
- capacity = next_power_of_two(capacity);
- const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
- void *memory = xcalloc(capacity, element_size);
- if (memory == NULL) return false;
-
- pool->buckets = memory;
- pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
- pool->size = 0;
- pool->capacity = capacity;
- return true;
-}
-
-/**
- * Return a pointer to the constant indicated by the given constant id.
- */
-pm_constant_t *
-pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
- assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
- return &pool->constants[constant_id - 1];
-}
-
-/**
- * Find a constant in a constant pool. Returns the id of the constant, or 0 if
- * the constant is not found.
- */
-pm_constant_id_t
-pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
- assert(is_power_of_two(pool->capacity));
- const uint32_t mask = pool->capacity - 1;
-
- uint32_t hash = pm_constant_pool_hash(start, length);
- uint32_t index = hash & mask;
- pm_constant_pool_bucket_t *bucket;
-
- while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
- pm_constant_t *constant = &pool->constants[bucket->id - 1];
- if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
- return bucket->id;
- }
-
- index = (index + 1) & mask;
- }
-
- return PM_CONSTANT_ID_UNSET;
-}
-
-/**
- * Insert a constant into a constant pool and return its index in the pool.
- */
-static inline pm_constant_id_t
-pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
- if (pool->size >= (pool->capacity / 4 * 3)) {
- if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
- }
-
- assert(is_power_of_two(pool->capacity));
- const uint32_t mask = pool->capacity - 1;
-
- uint32_t hash = pm_constant_pool_hash(start, length);
- uint32_t index = hash & mask;
- pm_constant_pool_bucket_t *bucket;
-
- while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
- // If there is a collision, then we need to check if the content is the
- // same as the content we are trying to insert. If it is, then we can
- // return the id of the existing constant.
- pm_constant_t *constant = &pool->constants[bucket->id - 1];
-
- if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
- // Since we have found a match, we need to check if this is
- // attempting to insert a shared or an owned constant. We want to
- // prefer shared constants since they don't require allocations.
- if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
- // If we're attempting to insert an owned constant and we have
- // an existing constant, then either way we don't want the given
- // memory. Either it's duplicated with the existing constant or
- // it's not necessary because we have a shared version.
- xfree((void *) start);
- } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
- // If we're attempting to insert a shared constant and the
- // existing constant is owned, then we can free the owned
- // constant and replace it with the shared constant.
- xfree((void *) constant->start);
- constant->start = start;
- bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
- }
-
- return bucket->id;
- }
-
- index = (index + 1) & mask;
- }
-
- // IDs are allocated starting at 1, since the value 0 denotes a non-existent
- // constant.
- uint32_t id = ++pool->size;
- assert(pool->size < ((uint32_t) (1 << 30)));
-
- *bucket = (pm_constant_pool_bucket_t) {
- .id = (unsigned int) (id & 0x3fffffff),
- .type = (unsigned int) (type & 0x3),
- .hash = hash
- };
-
- pool->constants[id - 1] = (pm_constant_t) {
- .start = start,
- .length = length,
- };
-
- return id;
-}
-
-/**
- * Insert a constant into a constant pool. Returns the id of the constant, or
- * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
- */
-pm_constant_id_t
-pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
- return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
-}
-
-/**
- * Insert a constant into a constant pool from memory that is now owned by the
- * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
- * potential calls to resize fail.
- */
-pm_constant_id_t
-pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
- return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
-}
-
-/**
- * Insert a constant into a constant pool from memory that is constant. Returns
- * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
- * resize fail.
- */
-pm_constant_id_t
-pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
- return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
-}
-
-/**
- * Free the memory associated with a constant pool.
- */
-void
-pm_constant_pool_free(pm_constant_pool_t *pool) {
- // For each constant in the current constant pool, free the contents if the
- // contents are owned.
- for (uint32_t index = 0; index < pool->capacity; index++) {
- pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
-
- // If an id is set on this constant, then we know we have content here.
- if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
- pm_constant_t *constant = &pool->constants[bucket->id - 1];
- xfree((void *) constant->start);
- }
- }
-
- xfree(pool->buckets);
-}
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
deleted file mode 100644
index 6df23f8f50..0000000000
--- a/prism/util/pm_constant_pool.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * @file pm_constant_pool.h
- *
- * A data structure that stores a set of strings.
- *
- * Each string is assigned a unique id, which can be used to compare strings for
- * equality. This comparison ends up being much faster than strcmp, since it
- * only requires a single integer comparison.
- */
-#ifndef PRISM_CONSTANT_POOL_H
-#define PRISM_CONSTANT_POOL_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * When we allocate constants into the pool, we reserve 0 to mean that the slot
- * is not yet filled. This constant is reused in other places to indicate the
- * lack of a constant id.
- */
-#define PM_CONSTANT_ID_UNSET 0
-
-/**
- * A constant id is a unique identifier for a constant in the constant pool.
- */
-typedef uint32_t pm_constant_id_t;
-
-/**
- * A list of constant IDs. Usually used to represent a set of locals.
- */
-typedef struct {
- /** The number of constant ids in the list. */
- size_t size;
-
- /** The number of constant ids that have been allocated in the list. */
- size_t capacity;
-
- /** The constant ids in the list. */
- pm_constant_id_t *ids;
-} pm_constant_id_list_t;
-
-/**
- * Initialize a list of constant ids.
- *
- * @param list The list to initialize.
- */
-void pm_constant_id_list_init(pm_constant_id_list_t *list);
-
-/**
- * Initialize a list of constant ids with a given capacity.
- *
- * @param list The list to initialize.
- * @param capacity The initial capacity of the list.
- */
-void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
-
-/**
- * Append a constant id to a list of constant ids. Returns false if any
- * potential reallocations fail.
- *
- * @param list The list to append to.
- * @param id The id to append.
- * @return Whether the append succeeded.
- */
-bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
-
-/**
- * Insert a constant id into a list of constant ids at the specified index.
- *
- * @param list The list to insert into.
- * @param index The index at which to insert.
- * @param id The id to insert.
- */
-void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
-
-/**
- * Checks if the current constant id list includes the given constant id.
- *
- * @param list The list to check.
- * @param id The id to check for.
- * @return Whether the list includes the given id.
- */
-bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
-
-/**
- * Free the memory associated with a list of constant ids.
- *
- * @param list The list to free.
- */
-void pm_constant_id_list_free(pm_constant_id_list_t *list);
-
-/**
- * The type of bucket in the constant pool hash map. This determines how the
- * bucket should be freed.
- */
-typedef unsigned int pm_constant_pool_bucket_type_t;
-
-/** By default, each constant is a slice of the source. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
-
-/** An owned constant is one for which memory has been allocated. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
-
-/** A constant constant is known at compile time. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
-
-/** A bucket in the hash map. */
-typedef struct {
- /** The incremental ID used for indexing back into the pool. */
- unsigned int id: 30;
-
- /** The type of the bucket, which determines how to free it. */
- pm_constant_pool_bucket_type_t type: 2;
-
- /** The hash of the bucket. */
- uint32_t hash;
-} pm_constant_pool_bucket_t;
-
-/** A constant in the pool which effectively stores a string. */
-typedef struct {
- /** A pointer to the start of the string. */
- const uint8_t *start;
-
- /** The length of the string. */
- size_t length;
-} pm_constant_t;
-
-/** The overall constant pool, which stores constants found while parsing. */
-typedef struct {
- /** The buckets in the hash map. */
- pm_constant_pool_bucket_t *buckets;
-
- /** The constants that are stored in the buckets. */
- pm_constant_t *constants;
-
- /** The number of buckets in the hash map. */
- uint32_t size;
-
- /** The number of buckets that have been allocated in the hash map. */
- uint32_t capacity;
-} pm_constant_pool_t;
-
-/**
- * Initialize a new constant pool with a given capacity.
- *
- * @param pool The pool to initialize.
- * @param capacity The initial capacity of the pool.
- * @return Whether the initialization succeeded.
- */
-bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
-
-/**
- * Return a pointer to the constant indicated by the given constant id.
- *
- * @param pool The pool to get the constant from.
- * @param constant_id The id of the constant to get.
- * @return A pointer to the constant.
- */
-pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
-
-/**
- * Find a constant in a constant pool. Returns the id of the constant, or 0 if
- * the constant is not found.
- *
- * @param pool The pool to find the constant in.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool that is a slice of a source string.
- * Returns the id of the constant, or 0 if any potential calls to resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool from memory that is now owned by the
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
- * resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool from memory that is constant. Returns
- * the id of the constant, or 0 if any potential calls to resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Free the memory associated with a constant pool.
- *
- * @param pool The pool to free.
- */
-void pm_constant_pool_free(pm_constant_pool_t *pool);
-
-#endif
diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c
deleted file mode 100644
index 4170ecc58d..0000000000
--- a/prism/util/pm_integer.c
+++ /dev/null
@@ -1,670 +0,0 @@
-#include "prism/util/pm_integer.h"
-
-/**
- * Pull out the length and values from the integer, regardless of the form in
- * which the length/values are stored.
- */
-#define INTEGER_EXTRACT(integer, length_variable, values_variable) \
- if ((integer)->values == NULL) { \
- length_variable = 1; \
- values_variable = &(integer)->value; \
- } else { \
- length_variable = (integer)->length; \
- values_variable = (integer)->values; \
- }
-
-/**
- * Adds two positive pm_integer_t with the given base.
- * Return pm_integer_t with values allocated. Not normalized.
- */
-static void
-big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
- size_t left_length;
- uint32_t *left_values;
- INTEGER_EXTRACT(left, left_length, left_values)
-
- size_t right_length;
- uint32_t *right_values;
- INTEGER_EXTRACT(right, right_length, right_values)
-
- size_t length = left_length < right_length ? right_length : left_length;
- uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1));
- if (values == NULL) return;
-
- uint64_t carry = 0;
- for (size_t index = 0; index < length; index++) {
- uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0);
- values[index] = (uint32_t) (sum % base);
- carry = sum / base;
- }
-
- if (carry > 0) {
- values[length] = (uint32_t) carry;
- length++;
- }
-
- *destination = (pm_integer_t) { length, values, 0, false };
-}
-
-/**
- * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given
- * base. Assume a, b, c, a - b - c all to be positive.
- * Return pm_integer_t with values allocated. Not normalized.
- */
-static void
-big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) {
- size_t a_length;
- uint32_t *a_values;
- INTEGER_EXTRACT(a, a_length, a_values)
-
- size_t b_length;
- uint32_t *b_values;
- INTEGER_EXTRACT(b, b_length, b_values)
-
- size_t c_length;
- uint32_t *c_values;
- INTEGER_EXTRACT(c, c_length, c_values)
-
- uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length);
- int64_t carry = 0;
-
- for (size_t index = 0; index < a_length; index++) {
- int64_t sub = (
- carry +
- a_values[index] -
- (index < b_length ? b_values[index] : 0) -
- (index < c_length ? c_values[index] : 0)
- );
-
- if (sub >= 0) {
- values[index] = (uint32_t) sub;
- carry = 0;
- } else {
- sub += 2 * (int64_t) base;
- values[index] = (uint32_t) ((uint64_t) sub % base);
- carry = sub / (int64_t) base - 2;
- }
- }
-
- while (a_length > 1 && values[a_length - 1] == 0) a_length--;
- *destination = (pm_integer_t) { a_length, values, 0, false };
-}
-
-/**
- * Multiply two positive integers with the given base using karatsuba algorithm.
- * Return pm_integer_t with values allocated. Not normalized.
- */
-static void
-karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) {
- size_t left_length;
- uint32_t *left_values;
- INTEGER_EXTRACT(left, left_length, left_values)
-
- size_t right_length;
- uint32_t *right_values;
- INTEGER_EXTRACT(right, right_length, right_values)
-
- if (left_length > right_length) {
- size_t temporary_length = left_length;
- left_length = right_length;
- right_length = temporary_length;
-
- uint32_t *temporary_values = left_values;
- left_values = right_values;
- right_values = temporary_values;
- }
-
- if (left_length <= 10) {
- size_t length = left_length + right_length;
- uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
- if (values == NULL) return;
-
- for (size_t left_index = 0; left_index < left_length; left_index++) {
- uint32_t carry = 0;
- for (size_t right_index = 0; right_index < right_length; right_index++) {
- uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry;
- values[left_index + right_index] = (uint32_t) (product % base);
- carry = (uint32_t) (product / base);
- }
- values[left_index + right_length] = carry;
- }
-
- while (length > 1 && values[length - 1] == 0) length--;
- *destination = (pm_integer_t) { length, values, 0, false };
- return;
- }
-
- if (left_length * 2 <= right_length) {
- uint32_t *values = (uint32_t *) xcalloc(left_length + right_length, sizeof(uint32_t));
-
- for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) {
- size_t end_offset = start_offset + left_length;
- if (end_offset > right_length) end_offset = right_length;
-
- pm_integer_t sliced_left = {
- .length = left_length,
- .values = left_values,
- .value = 0,
- .negative = false
- };
-
- pm_integer_t sliced_right = {
- .length = end_offset - start_offset,
- .values = right_values + start_offset,
- .value = 0,
- .negative = false
- };
-
- pm_integer_t product;
- karatsuba_multiply(&product, &sliced_left, &sliced_right, base);
-
- uint32_t carry = 0;
- for (size_t index = 0; index < product.length; index++) {
- uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry;
- values[start_offset + index] = (uint32_t) (sum % base);
- carry = (uint32_t) (sum / base);
- }
-
- if (carry > 0) values[start_offset + product.length] += carry;
- pm_integer_free(&product);
- }
-
- *destination = (pm_integer_t) { left_length + right_length, values, 0, false };
- return;
- }
-
- size_t half = left_length / 2;
- pm_integer_t x0 = { half, left_values, 0, false };
- pm_integer_t x1 = { left_length - half, left_values + half, 0, false };
- pm_integer_t y0 = { half, right_values, 0, false };
- pm_integer_t y1 = { right_length - half, right_values + half, 0, false };
-
- pm_integer_t z0 = { 0 };
- karatsuba_multiply(&z0, &x0, &y0, base);
-
- pm_integer_t z2 = { 0 };
- karatsuba_multiply(&z2, &x1, &y1, base);
-
- // For simplicity to avoid considering negative values,
- // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm.
- pm_integer_t x01 = { 0 };
- big_add(&x01, &x0, &x1, base);
-
- pm_integer_t y01 = { 0 };
- big_add(&y01, &y0, &y1, base);
-
- pm_integer_t xy = { 0 };
- karatsuba_multiply(&xy, &x01, &y01, base);
-
- pm_integer_t z1;
- big_sub2(&z1, &xy, &z0, &z2, base);
-
- size_t length = left_length + right_length;
- uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t));
-
- assert(z0.values != NULL);
- memcpy(values, z0.values, sizeof(uint32_t) * z0.length);
-
- assert(z2.values != NULL);
- memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length);
-
- uint32_t carry = 0;
- for(size_t index = 0; index < z1.length; index++) {
- uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index];
- values[index + half] = (uint32_t) (sum % base);
- carry = (uint32_t) (sum / base);
- }
-
- for(size_t index = half + z1.length; carry > 0; index++) {
- uint64_t sum = (uint64_t) carry + values[index];
- values[index] = (uint32_t) (sum % base);
- carry = (uint32_t) (sum / base);
- }
-
- while (length > 1 && values[length - 1] == 0) length--;
- pm_integer_free(&z0);
- pm_integer_free(&z1);
- pm_integer_free(&z2);
- pm_integer_free(&x01);
- pm_integer_free(&y01);
- pm_integer_free(&xy);
-
- *destination = (pm_integer_t) { length, values, 0, false };
-}
-
-/**
- * The values of a hexadecimal digit, where the index is the ASCII character.
- * Note that there's an odd exception here where _ is mapped to 0. This is
- * because it's possible for us to end up trying to parse a number that has
- * already had an error attached to it, and we want to provide _something_ to
- * the user.
- */
-static const int8_t pm_integer_parse_digit_values[256] = {
-// 0 1 2 3 4 5 6 7 8 9 A B C D E F
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x
- -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, // 5x
- -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx
-};
-
-/**
- * Return the value of a hexadecimal digit in a uint8_t.
- */
-static uint8_t
-pm_integer_parse_digit(const uint8_t character) {
- int8_t value = pm_integer_parse_digit_values[character];
- assert(value != -1 && "invalid digit");
-
- return (uint8_t) value;
-}
-
-/**
- * Create a pm_integer_t from uint64_t with the given base. It is assumed that
- * the memory for the pm_integer_t pointer has been zeroed.
- */
-static void
-pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) {
- if (value < base) {
- integer->value = (uint32_t) value;
- return;
- }
-
- size_t length = 0;
- uint64_t length_value = value;
- while (length_value > 0) {
- length++;
- length_value /= base;
- }
-
- uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length);
- if (values == NULL) return;
-
- for (size_t value_index = 0; value_index < length; value_index++) {
- values[value_index] = (uint32_t) (value % base);
- value /= base;
- }
-
- integer->length = length;
- integer->values = values;
-}
-
-/**
- * Normalize pm_integer_t.
- * Heading zero values will be removed. If the integer fits into uint32_t,
- * values is set to NULL, length is set to 0, and value field will be used.
- */
-static void
-pm_integer_normalize(pm_integer_t *integer) {
- if (integer->values == NULL) {
- return;
- }
-
- while (integer->length > 1 && integer->values[integer->length - 1] == 0) {
- integer->length--;
- }
-
- if (integer->length > 1) {
- return;
- }
-
- uint32_t value = integer->values[0];
- bool negative = integer->negative && value != 0;
-
- pm_integer_free(integer);
- *integer = (pm_integer_t) { .values = NULL, .value = value, .length = 0, .negative = negative };
-}
-
-/**
- * Convert base of the integer.
- * In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9.
- */
-static void
-pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) {
- size_t source_length;
- const uint32_t *source_values;
- INTEGER_EXTRACT(source, source_length, source_values)
-
- size_t bigints_length = (source_length + 1) / 2;
- assert(bigints_length > 0);
-
- pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t));
- if (bigints == NULL) return;
-
- for (size_t index = 0; index < source_length; index += 2) {
- uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0);
- pm_integer_from_uint64(&bigints[index / 2], value, base_to);
- }
-
- pm_integer_t base = { 0 };
- pm_integer_from_uint64(&base, base_from, base_to);
-
- while (bigints_length > 1) {
- pm_integer_t next_base;
- karatsuba_multiply(&next_base, &base, &base, base_to);
-
- pm_integer_free(&base);
- base = next_base;
-
- size_t next_length = (bigints_length + 1) / 2;
- pm_integer_t *next_bigints = (pm_integer_t *) xcalloc(next_length, sizeof(pm_integer_t));
-
- for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) {
- if (bigints_index + 1 == bigints_length) {
- next_bigints[bigints_index / 2] = bigints[bigints_index];
- } else {
- pm_integer_t multiplied = { 0 };
- karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to);
-
- big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to);
- pm_integer_free(&bigints[bigints_index]);
- pm_integer_free(&bigints[bigints_index + 1]);
- pm_integer_free(&multiplied);
- }
- }
-
- xfree(bigints);
- bigints = next_bigints;
- bigints_length = next_length;
- }
-
- *destination = bigints[0];
- destination->negative = source->negative;
- pm_integer_normalize(destination);
-
- xfree(bigints);
- pm_integer_free(&base);
-}
-
-#undef INTEGER_EXTRACT
-
-/**
- * Convert digits to integer with the given power-of-two base.
- */
-static void
-pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) {
- size_t bit = 1;
- while (base > (uint32_t) (1 << bit)) bit++;
-
- size_t length = (digits_length * bit + 31) / 32;
- uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
-
- for (size_t digit_index = 0; digit_index < digits_length; digit_index++) {
- size_t bit_position = bit * (digits_length - digit_index - 1);
- uint32_t value = digits[digit_index];
-
- size_t index = bit_position / 32;
- size_t shift = bit_position % 32;
-
- values[index] |= value << shift;
- if (32 - shift < bit) values[index + 1] |= value >> (32 - shift);
- }
-
- while (length > 1 && values[length - 1] == 0) length--;
- *integer = (pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false };
- pm_integer_normalize(integer);
-}
-
-/**
- * Convert decimal digits to pm_integer_t.
- */
-static void
-pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
- const size_t batch = 9;
- size_t length = (digits_length + batch - 1) / batch;
-
- uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
- uint32_t value = 0;
-
- for (size_t digits_index = 0; digits_index < digits_length; digits_index++) {
- value = value * 10 + digits[digits_index];
-
- size_t reverse_index = digits_length - digits_index - 1;
- if (reverse_index % batch == 0) {
- values[reverse_index / batch] = value;
- value = 0;
- }
- }
-
- // Convert base from 10**9 to 1<<32.
- pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
- xfree(values);
-}
-
-/**
- * Parse a large integer from a string that does not fit into uint32_t.
- */
-static void
-pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
- // Allocate an array to store digits.
- uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
- size_t digits_length = 0;
-
- for (; start < end; start++) {
- if (*start == '_') continue;
- digits[digits_length++] = pm_integer_parse_digit(*start);
- }
-
- // Construct pm_integer_t from the digits.
- if (multiplier == 10) {
- pm_integer_parse_decimal(integer, digits, digits_length);
- } else {
- pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
- }
-
- xfree(digits);
-}
-
-/**
- * Parse an integer from a string. This assumes that the format of the integer
- * has already been validated, as internal validation checks are not performed
- * here.
- */
-void
-pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) {
- // Ignore unary +. Unary - is parsed differently and will not end up here.
- // Instead, it will modify the parsed integer later.
- if (*start == '+') start++;
-
- // Determine the multiplier from the base, and skip past any prefixes.
- uint32_t multiplier = 10;
- switch (base) {
- case PM_INTEGER_BASE_DEFAULT:
- while (*start == '0') start++; // 01 -> 1
- break;
- case PM_INTEGER_BASE_BINARY:
- start += 2; // 0b
- multiplier = 2;
- break;
- case PM_INTEGER_BASE_OCTAL:
- start++; // 0
- if (*start == '_' || *start == 'o' || *start == 'O') start++; // o
- multiplier = 8;
- break;
- case PM_INTEGER_BASE_DECIMAL:
- if (*start == '0' && (end - start) > 1) start += 2; // 0d
- break;
- case PM_INTEGER_BASE_HEXADECIMAL:
- start += 2; // 0x
- multiplier = 16;
- break;
- case PM_INTEGER_BASE_UNKNOWN:
- if (*start == '0' && (end - start) > 1) {
- switch (start[1]) {
- case '_': start += 2; multiplier = 8; break;
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break;
- case 'b': case 'B': start += 2; multiplier = 2; break;
- case 'o': case 'O': start += 2; multiplier = 8; break;
- case 'd': case 'D': start += 2; break;
- case 'x': case 'X': start += 2; multiplier = 16; break;
- default: assert(false && "unreachable"); break;
- }
- }
- break;
- }
-
- // It's possible that we've consumed everything at this point if there is an
- // invalid integer. If this is the case, we'll just return 0.
- if (start >= end) return;
-
- const uint8_t *cursor = start;
- uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++);
-
- for (; cursor < end; cursor++) {
- if (*cursor == '_') continue;
- value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor);
-
- if (value > UINT32_MAX) {
- // If the integer is too large to fit into a single uint32_t, then
- // we'll parse it as a big integer.
- pm_integer_parse_big(integer, multiplier, start, end);
- return;
- }
- }
-
- integer->value = (uint32_t) value;
-}
-
-/**
- * Compare two integers. This function returns -1 if the left integer is less
- * than the right integer, 0 if they are equal, and 1 if the left integer is
- * greater than the right integer.
- */
-int
-pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
- if (left->negative != right->negative) return left->negative ? -1 : 1;
- int negative = left->negative ? -1 : 1;
-
- if (left->values == NULL && right->values == NULL) {
- if (left->value < right->value) return -1 * negative;
- if (left->value > right->value) return 1 * negative;
- return 0;
- }
-
- if (left->values == NULL || left->length < right->length) return -1 * negative;
- if (right->values == NULL || left->length > right->length) return 1 * negative;
-
- for (size_t index = 0; index < left->length; index++) {
- size_t value_index = left->length - index - 1;
- uint32_t left_value = left->values[value_index];
- uint32_t right_value = right->values[value_index];
-
- if (left_value < right_value) return -1 * negative;
- if (left_value > right_value) return 1 * negative;
- }
-
- return 0;
-}
-
-/**
- * Reduce a ratio of integers to its simplest form.
- */
-void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
- // If either the numerator or denominator do not fit into a 32-bit integer,
- // then this function is a no-op. In the future, we may consider reducing
- // even the larger numbers, but for now we're going to keep it simple.
- if (
- // If the numerator doesn't fit into a 32-bit integer, return early.
- numerator->length != 0 ||
- // If the denominator doesn't fit into a 32-bit integer, return early.
- denominator->length != 0 ||
- // If the numerator is 0, then return early.
- numerator->value == 0 ||
- // If the denominator is 1, then return early.
- denominator->value == 1
- ) return;
-
- // Find the greatest common divisor of the numerator and denominator.
- uint32_t divisor = numerator->value;
- uint32_t remainder = denominator->value;
-
- while (remainder != 0) {
- uint32_t temporary = remainder;
- remainder = divisor % remainder;
- divisor = temporary;
- }
-
- // Divide the numerator and denominator by the greatest common divisor.
- numerator->value /= divisor;
- denominator->value /= divisor;
-}
-
-/**
- * Convert an integer to a decimal string.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
- if (integer->negative) {
- pm_buffer_append_byte(buffer, '-');
- }
-
- // If the integer fits into a single uint32_t, then we can just append the
- // value directly to the buffer.
- if (integer->values == NULL) {
- pm_buffer_append_format(buffer, "%" PRIu32, integer->value);
- return;
- }
-
- // If the integer is two uint32_t values, then we can | them together and
- // append the result to the buffer.
- if (integer->length == 2) {
- const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32);
- pm_buffer_append_format(buffer, "%" PRIu64, value);
- return;
- }
-
- // Otherwise, first we'll convert the base from 1<<32 to 10**9.
- pm_integer_t converted = { 0 };
- pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000);
-
- if (converted.values == NULL) {
- pm_buffer_append_format(buffer, "%" PRIu32, converted.value);
- pm_integer_free(&converted);
- return;
- }
-
- // Allocate a buffer that we'll copy the decimal digits into.
- size_t digits_length = converted.length * 9;
- char *digits = xcalloc(digits_length, sizeof(char));
- if (digits == NULL) return;
-
- // Pack bigdecimal to digits.
- for (size_t value_index = 0; value_index < converted.length; value_index++) {
- uint32_t value = converted.values[value_index];
-
- for (size_t digit_index = 0; digit_index < 9; digit_index++) {
- digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10);
- value /= 10;
- }
- }
-
- size_t start_offset = 0;
- while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++;
-
- // Finally, append the string to the buffer and free the digits.
- pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
- xfree(digits);
- pm_integer_free(&converted);
-}
-
-/**
- * Free the internal memory of an integer. This memory will only be allocated if
- * the integer exceeds the size of a single uint32_t.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_integer_free(pm_integer_t *integer) {
- if (integer->values) {
- xfree(integer->values);
- }
-}
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
deleted file mode 100644
index a9e2966703..0000000000
--- a/prism/util/pm_integer.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * @file pm_integer.h
- *
- * This module provides functions for working with arbitrary-sized integers.
- */
-#ifndef PRISM_NUMBER_H
-#define PRISM_NUMBER_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_buffer.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-/**
- * A structure represents an arbitrary-sized integer.
- */
-typedef struct {
- /**
- * The number of allocated values. length is set to 0 if the integer fits
- * into uint32_t.
- */
- size_t length;
-
- /**
- * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
- */
- uint32_t *values;
-
- /**
- * Embedded value for small integer. This value is set to 0 if the value
- * does not fit into uint32_t.
- */
- uint32_t value;
-
- /**
- * Whether or not the integer is negative. It is stored this way so that a
- * zeroed pm_integer_t is always positive zero.
- */
- bool negative;
-} pm_integer_t;
-
-/**
- * An enum controlling the base of an integer. It is expected that the base is
- * already known before parsing the integer, even though it could be derived
- * from the string itself.
- */
-typedef enum {
- /** The default decimal base, with no prefix. Leading 0s will be ignored. */
- PM_INTEGER_BASE_DEFAULT,
-
- /** The binary base, indicated by a 0b or 0B prefix. */
- PM_INTEGER_BASE_BINARY,
-
- /** The octal base, indicated by a 0, 0o, or 0O prefix. */
- PM_INTEGER_BASE_OCTAL,
-
- /** The decimal base, indicated by a 0d, 0D, or empty prefix. */
- PM_INTEGER_BASE_DECIMAL,
-
- /** The hexadecimal base, indicated by a 0x or 0X prefix. */
- PM_INTEGER_BASE_HEXADECIMAL,
-
- /**
- * An unknown base, in which case pm_integer_parse will derive it based on
- * the content of the string. This is less efficient and does more
- * comparisons, so if callers know the base ahead of time, they should use
- * that instead.
- */
- PM_INTEGER_BASE_UNKNOWN
-} pm_integer_base_t;
-
-/**
- * Parse an integer from a string. This assumes that the format of the integer
- * has already been validated, as internal validation checks are not performed
- * here.
- *
- * @param integer The integer to parse into.
- * @param base The base of the integer.
- * @param start The start of the string.
- * @param end The end of the string.
- */
-void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
-
-/**
- * Compare two integers. This function returns -1 if the left integer is less
- * than the right integer, 0 if they are equal, and 1 if the left integer is
- * greater than the right integer.
- *
- * @param left The left integer to compare.
- * @param right The right integer to compare.
- * @return The result of the comparison.
- */
-int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
-
-/**
- * Reduce a ratio of integers to its simplest form.
- *
- * If either the numerator or denominator do not fit into a 32-bit integer, then
- * this function is a no-op. In the future, we may consider reducing even the
- * larger numbers, but for now we're going to keep it simple.
- *
- * @param numerator The numerator of the ratio.
- * @param denominator The denominator of the ratio.
- */
-void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
-
-/**
- * Convert an integer to a decimal string.
- *
- * @param buffer The buffer to append the string to.
- * @param integer The integer to convert to a string.
- */
-PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
-
-/**
- * Free the internal memory of an integer. This memory will only be allocated if
- * the integer exceeds the size of a single node in the linked list.
- *
- * @param integer The integer to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer);
-
-#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
deleted file mode 100644
index ad2294cd60..0000000000
--- a/prism/util/pm_list.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "prism/util/pm_list.h"
-
-/**
- * Returns true if the given list is empty.
- */
-PRISM_EXPORTED_FUNCTION bool
-pm_list_empty_p(pm_list_t *list) {
- return list->head == NULL;
-}
-
-/**
- * Returns the size of the list.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_list_size(pm_list_t *list) {
- return list->size;
-}
-
-/**
- * Append a node to the given list.
- */
-void
-pm_list_append(pm_list_t *list, pm_list_node_t *node) {
- if (list->head == NULL) {
- list->head = node;
- } else {
- list->tail->next = node;
- }
-
- list->tail = node;
- list->size++;
-}
-
-/**
- * Deallocate the internal state of the given list.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_list_free(pm_list_t *list) {
- pm_list_node_t *node = list->head;
- pm_list_node_t *next;
-
- while (node != NULL) {
- next = node->next;
- xfree(node);
- node = next;
- }
-
- list->size = 0;
-}
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
deleted file mode 100644
index 3512dee979..0000000000
--- a/prism/util/pm_list.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * @file pm_list.h
- *
- * An abstract linked list.
- */
-#ifndef PRISM_LIST_H
-#define PRISM_LIST_H
-
-#include "prism/defines.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-/**
- * This struct represents an abstract linked list that provides common
- * functionality. It is meant to be used any time a linked list is necessary to
- * store data.
- *
- * The linked list itself operates off a set of pointers. Because the pointers
- * are not necessarily sequential, they can be of any size. We use this fact to
- * allow the consumer of this linked list to extend the node struct to include
- * any data they want. This is done by using the pm_list_node_t as the first
- * member of the struct.
- *
- * For example, if we want to store a list of integers, we can do the following:
- *
- * ```c
- * typedef struct {
- * pm_list_node_t node;
- * int value;
- * } pm_int_node_t;
- *
- * pm_list_t list = { 0 };
- * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t));
- * node->value = 5;
- *
- * pm_list_append(&list, &node->node);
- * ```
- *
- * The pm_list_t struct is used to represent the overall linked list. It
- * contains a pointer to the head and tail of the list. This allows for easy
- * iteration and appending of new nodes.
- */
-typedef struct pm_list_node {
- /** A pointer to the next node in the list. */
- struct pm_list_node *next;
-} pm_list_node_t;
-
-/**
- * This represents the overall linked list. It keeps a pointer to the head and
- * tail so that iteration is easy and pushing new nodes is easy.
- */
-typedef struct {
- /** The size of the list. */
- size_t size;
-
- /** A pointer to the head of the list. */
- pm_list_node_t *head;
-
- /** A pointer to the tail of the list. */
- pm_list_node_t *tail;
-} pm_list_t;
-
-/**
- * Returns true if the given list is empty.
- *
- * @param list The list to check.
- * @return True if the given list is empty, otherwise false.
- */
-PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
-
-/**
- * Returns the size of the list.
- *
- * @param list The list to check.
- * @return The size of the list.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
-
-/**
- * Append a node to the given list.
- *
- * @param list The list to append to.
- * @param node The node to append.
- */
-void pm_list_append(pm_list_t *list, pm_list_node_t *node);
-
-/**
- * Deallocate the internal state of the given list.
- *
- * @param list The list to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
-
-#endif
diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c
deleted file mode 100644
index 7ea20ace6d..0000000000
--- a/prism/util/pm_memchr.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "prism/util/pm_memchr.h"
-
-#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
-
-/**
- * We need to roll our own memchr to handle cases where the encoding changes and
- * we need to search for a character in a buffer that could be the trailing byte
- * of a multibyte character.
- */
-void *
-pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
- if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
- const uint8_t *source = (const uint8_t *) memory;
- size_t index = 0;
-
- while (index < number) {
- if (source[index] == character) {
- return (void *) (source + index);
- }
-
- size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
- if (width == 0) {
- return NULL;
- }
-
- index += width;
- }
-
- return NULL;
- } else {
- return memchr(memory, character, number);
- }
-}
-
-#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
deleted file mode 100644
index e0671eaed3..0000000000
--- a/prism/util/pm_memchr.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * @file pm_memchr.h
- *
- * A custom memchr implementation.
- */
-#ifndef PRISM_MEMCHR_H
-#define PRISM_MEMCHR_H
-
-#include "prism/defines.h"
-#include "prism/encoding.h"
-
-#include <stddef.h>
-
-/**
- * We need to roll our own memchr to handle cases where the encoding changes and
- * we need to search for a character in a buffer that could be the trailing byte
- * of a multibyte character.
- *
- * @param source The source string.
- * @param character The character to search for.
- * @param number The maximum number of bytes to search.
- * @param encoding_changed Whether the encoding changed.
- * @param encoding A pointer to the encoding.
- * @return A pointer to the first occurrence of the character in the source
- * string, or NULL if no such character exists.
- */
-void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
-
-#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
deleted file mode 100644
index 8331618f54..0000000000
--- a/prism/util/pm_newline_list.c
+++ /dev/null
@@ -1,125 +0,0 @@
-#include "prism/util/pm_newline_list.h"
-
-/**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
- */
-bool
-pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
- list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
- if (list->offsets == NULL) return false;
-
- list->start = start;
-
- // This is 1 instead of 0 because we want to include the first line of the
- // file as having offset 0, which is set because of calloc.
- list->size = 1;
- list->capacity = capacity;
-
- return true;
-}
-
-/**
- * Clear out the newlines that have been appended to the list.
- */
-void
-pm_newline_list_clear(pm_newline_list_t *list) {
- list->size = 1;
-}
-
-/**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
- */
-bool
-pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
- if (list->size == list->capacity) {
- size_t *original_offsets = list->offsets;
-
- list->capacity = (list->capacity * 3) / 2;
- list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
- if (list->offsets == NULL) return false;
-
- memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
- xfree(original_offsets);
- }
-
- assert(*cursor == '\n');
- assert(cursor >= list->start);
- size_t newline_offset = (size_t) (cursor - list->start + 1);
-
- assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
- list->offsets[list->size++] = newline_offset;
-
- return true;
-}
-
-/**
- * Returns the line of the given offset. If the offset is not in the list, the
- * line of the closest offset less than the given offset is returned.
- */
-int32_t
-pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
- assert(cursor >= list->start);
- size_t offset = (size_t) (cursor - list->start);
-
- size_t left = 0;
- size_t right = list->size - 1;
-
- while (left <= right) {
- size_t mid = left + (right - left) / 2;
-
- if (list->offsets[mid] == offset) {
- return ((int32_t) mid) + start_line;
- }
-
- if (list->offsets[mid] < offset) {
- left = mid + 1;
- } else {
- right = mid - 1;
- }
- }
-
- return ((int32_t) left) + start_line - 1;
-}
-
-/**
- * Returns the line and column of the given offset. If the offset is not in the
- * list, the line and column of the closest offset less than the given offset
- * are returned.
- */
-pm_line_column_t
-pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
- assert(cursor >= list->start);
- size_t offset = (size_t) (cursor - list->start);
-
- size_t left = 0;
- size_t right = list->size - 1;
-
- while (left <= right) {
- size_t mid = left + (right - left) / 2;
-
- if (list->offsets[mid] == offset) {
- return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
- }
-
- if (list->offsets[mid] < offset) {
- left = mid + 1;
- } else {
- right = mid - 1;
- }
- }
-
- return ((pm_line_column_t) {
- .line = ((int32_t) left) + start_line - 1,
- .column = (uint32_t) (offset - list->offsets[left - 1])
- });
-}
-
-/**
- * Free the internal memory allocated for the newline list.
- */
-void
-pm_newline_list_free(pm_newline_list_t *list) {
- xfree(list->offsets);
-}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
deleted file mode 100644
index 406abe8ba5..0000000000
--- a/prism/util/pm_newline_list.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * @file pm_newline_list.h
- *
- * A list of byte offsets of newlines in a string.
- *
- * When compiling the syntax tree, it's necessary to know the line and column
- * of many nodes. This is necessary to support things like error messages,
- * tracepoints, etc.
- *
- * It's possible that we could store the start line, start column, end line, and
- * end column on every node in addition to the offsets that we already store,
- * but that would be quite a lot of memory overhead.
- */
-#ifndef PRISM_NEWLINE_LIST_H
-#define PRISM_NEWLINE_LIST_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-
-/**
- * A list of offsets of newlines in a string. The offsets are assumed to be
- * sorted/inserted in ascending order.
- */
-typedef struct {
- /** A pointer to the start of the source string. */
- const uint8_t *start;
-
- /** The number of offsets in the list. */
- size_t size;
-
- /** The capacity of the list that has been allocated. */
- size_t capacity;
-
- /** The list of offsets. */
- size_t *offsets;
-} pm_newline_list_t;
-
-/**
- * A line and column in a string.
- */
-typedef struct {
- /** The line number. */
- int32_t line;
-
- /** The column number. */
- uint32_t column;
-} pm_line_column_t;
-
-/**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
- *
- * @param list The list to initialize.
- * @param start A pointer to the start of the source string.
- * @param capacity The initial capacity of the list.
- * @return True if the allocation of the offsets succeeds, otherwise false.
- */
-bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
-
-/**
- * Clear out the newlines that have been appended to the list.
- *
- * @param list The list to clear.
- */
-void
-pm_newline_list_clear(pm_newline_list_t *list);
-
-/**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
- *
- * @param list The list to append to.
- * @param cursor A pointer to the offset to append.
- * @return True if the reallocation of the offsets succeeds (if one was
- * necessary), otherwise false.
- */
-bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
-
-/**
- * Returns the line of the given offset. If the offset is not in the list, the
- * line of the closest offset less than the given offset is returned.
- *
- * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
- * @param start_line The line to start counting from.
- * @return The line of the given offset.
- */
-int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
-
-/**
- * Returns the line and column of the given offset. If the offset is not in the
- * list, the line and column of the closest offset less than the given offset
- * are returned.
- *
- * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
- * @param start_line The line to start counting from.
- * @return The line and column of the given offset.
- */
-pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
-
-/**
- * Free the internal memory allocated for the newline list.
- *
- * @param list The list to free.
- */
-void pm_newline_list_free(pm_newline_list_t *list);
-
-#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
deleted file mode 100644
index 75422fbdf2..0000000000
--- a/prism/util/pm_string.c
+++ /dev/null
@@ -1,383 +0,0 @@
-#include "prism/util/pm_string.h"
-
-/**
- * Returns the size of the pm_string_t struct. This is necessary to allocate the
- * correct amount of memory in the FFI backend.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_sizeof(void) {
- return sizeof(pm_string_t);
-}
-
-/**
- * Initialize a shared string that is based on initial input.
- */
-void
-pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
- assert(start <= end);
-
- *string = (pm_string_t) {
- .type = PM_STRING_SHARED,
- .source = start,
- .length = (size_t) (end - start)
- };
-}
-
-/**
- * Initialize an owned string that is responsible for freeing allocated memory.
- */
-void
-pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
- *string = (pm_string_t) {
- .type = PM_STRING_OWNED,
- .source = source,
- .length = length
- };
-}
-
-/**
- * Initialize a constant string that doesn't own its memory source.
- */
-void
-pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
- *string = (pm_string_t) {
- .type = PM_STRING_CONSTANT,
- .source = (const uint8_t *) source,
- .length = length
- };
-}
-
-#ifdef _WIN32
-/**
- * Represents a file handle on Windows, where the path will need to be freed
- * when the file is closed.
- */
-typedef struct {
- /** The path to the file, which will become allocated memory. */
- WCHAR *path;
-
- /** The handle to the file, which will start as uninitialized memory. */
- HANDLE file;
-} pm_string_file_handle_t;
-
-/**
- * Open the file indicated by the filepath parameter for reading on Windows.
- * Perform any kind of normalization that needs to happen on the filepath.
- */
-static pm_string_init_result_t
-pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
- int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
- if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
-
- handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
- if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
- xfree(handle->path);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
- if (handle->file == INVALID_HANDLE_VALUE) {
- pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
-
- if (GetLastError() == ERROR_ACCESS_DENIED) {
- DWORD attributes = GetFileAttributesW(handle->path);
- if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
- result = PM_STRING_INIT_ERROR_DIRECTORY;
- }
- }
-
- xfree(handle->path);
- return result;
- }
-
- return PM_STRING_INIT_SUCCESS;
-}
-
-/**
- * Close the file handle and free the path.
- */
-static void
-pm_string_file_handle_close(pm_string_file_handle_t *handle) {
- xfree(handle->path);
- CloseHandle(handle->file);
-}
-#endif
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * We want to use demand paging as much as possible in order to avoid having to
- * read the entire file into memory (which could be detrimental to performance
- * for large files). This means that if we're on windows we'll use
- * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
- * `mmap`, and on other POSIX systems we'll use `read`.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t
-pm_string_mapped_init(pm_string_t *string, const char *filepath) {
-#ifdef _WIN32
- // Open the file for reading.
- pm_string_file_handle_t handle;
- pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
- if (result != PM_STRING_INIT_SUCCESS) return result;
-
- // Get the file size.
- DWORD file_size = GetFileSize(handle.file, NULL);
- if (file_size == INVALID_FILE_SIZE) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // If the file is empty, then we don't need to do anything else, we'll set
- // the source to a constant empty string and return.
- if (file_size == 0) {
- pm_string_file_handle_close(&handle);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- // Create a mapping of the file.
- HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
- if (mapping == NULL) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Map the file into memory.
- uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
- CloseHandle(mapping);
- pm_string_file_handle_close(&handle);
-
- if (source == NULL) {
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
- return PM_STRING_INIT_SUCCESS;
-#elif defined(_POSIX_MAPPED_FILES)
- // Open the file for reading
- int fd = open(filepath, O_RDONLY);
- if (fd == -1) {
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Stat the file to get the file size
- struct stat sb;
- if (fstat(fd, &sb) == -1) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Ensure it is a file and not a directory
- if (S_ISDIR(sb.st_mode)) {
- close(fd);
- return PM_STRING_INIT_ERROR_DIRECTORY;
- }
-
- // mmap the file descriptor to virtually get the contents
- size_t size = (size_t) sb.st_size;
- uint8_t *source = NULL;
-
- if (size == 0) {
- close(fd);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
- if (source == MAP_FAILED) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- close(fd);
- *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
- return PM_STRING_INIT_SUCCESS;
-#else
- return pm_string_file_init(string, filepath);
-#endif
-}
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t
-pm_string_file_init(pm_string_t *string, const char *filepath) {
-#ifdef _WIN32
- // Open the file for reading.
- pm_string_file_handle_t handle;
- pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
- if (result != PM_STRING_INIT_SUCCESS) return result;
-
- // Get the file size.
- DWORD file_size = GetFileSize(handle.file, NULL);
- if (file_size == INVALID_FILE_SIZE) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // If the file is empty, then we don't need to do anything else, we'll set
- // the source to a constant empty string and return.
- if (file_size == 0) {
- pm_string_file_handle_close(&handle);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- // Create a buffer to read the file into.
- uint8_t *source = xmalloc(file_size);
- if (source == NULL) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Read the contents of the file
- DWORD bytes_read;
- if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Check the number of bytes read
- if (bytes_read != file_size) {
- xfree(source);
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- pm_string_file_handle_close(&handle);
- *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
- return PM_STRING_INIT_SUCCESS;
-#elif defined(PRISM_HAS_FILESYSTEM)
- // Open the file for reading
- int fd = open(filepath, O_RDONLY);
- if (fd == -1) {
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Stat the file to get the file size
- struct stat sb;
- if (fstat(fd, &sb) == -1) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Ensure it is a file and not a directory
- if (S_ISDIR(sb.st_mode)) {
- close(fd);
- return PM_STRING_INIT_ERROR_DIRECTORY;
- }
-
- // Check the size to see if it's empty
- size_t size = (size_t) sb.st_size;
- if (size == 0) {
- close(fd);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- size_t length = (size_t) size;
- uint8_t *source = xmalloc(length);
- if (source == NULL) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- long bytes_read = (long) read(fd, source, length);
- close(fd);
-
- if (bytes_read == -1) {
- xfree(source);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
- return PM_STRING_INIT_SUCCESS;
-#else
- (void) string;
- (void) filepath;
- perror("pm_string_file_init is not implemented for this platform");
- return PM_STRING_INIT_ERROR_GENERIC;
-#endif
-}
-
-/**
- * Ensure the string is owned. If it is not, then reinitialize it as owned and
- * copy over the previous source.
- */
-void
-pm_string_ensure_owned(pm_string_t *string) {
- if (string->type == PM_STRING_OWNED) return;
-
- size_t length = pm_string_length(string);
- const uint8_t *source = pm_string_source(string);
-
- uint8_t *memory = xmalloc(length);
- if (!memory) return;
-
- pm_string_owned_init(string, memory, length);
- memcpy((void *) string->source, source, length);
-}
-
-/**
- * Compare the underlying lengths and bytes of two strings. Returns 0 if the
- * strings are equal, a negative number if the left string is less than the
- * right string, and a positive number if the left string is greater than the
- * right string.
- */
-int
-pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
- size_t left_length = pm_string_length(left);
- size_t right_length = pm_string_length(right);
-
- if (left_length < right_length) {
- return -1;
- } else if (left_length > right_length) {
- return 1;
- }
-
- return memcmp(pm_string_source(left), pm_string_source(right), left_length);
-}
-
-/**
- * Returns the length associated with the string.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_length(const pm_string_t *string) {
- return string->length;
-}
-
-/**
- * Returns the start pointer associated with the string.
- */
-PRISM_EXPORTED_FUNCTION const uint8_t *
-pm_string_source(const pm_string_t *string) {
- return string->source;
-}
-
-/**
- * Free the associated memory of the given string.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_string_free(pm_string_t *string) {
- void *memory = (void *) string->source;
-
- if (string->type == PM_STRING_OWNED) {
- xfree(memory);
-#ifdef PRISM_HAS_MMAP
- } else if (string->type == PM_STRING_MAPPED && string->length) {
-#if defined(_WIN32)
- UnmapViewOfFile(memory);
-#elif defined(_POSIX_MAPPED_FILES)
- munmap(memory, string->length);
-#endif
-#endif /* PRISM_HAS_MMAP */
- }
-}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
deleted file mode 100644
index f99f1abdf3..0000000000
--- a/prism/util/pm_string.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/**
- * @file pm_string.h
- *
- * A generic string type that can have various ownership semantics.
- */
-#ifndef PRISM_STRING_H
-#define PRISM_STRING_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-
-// The following headers are necessary to read files using demand paging.
-#ifdef _WIN32
-#include <windows.h>
-#elif defined(_POSIX_MAPPED_FILES)
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#elif defined(PRISM_HAS_FILESYSTEM)
-#include <fcntl.h>
-#include <sys/stat.h>
-#endif
-
-/**
- * A generic string type that can have various ownership semantics.
- */
-typedef struct {
- /** A pointer to the start of the string. */
- const uint8_t *source;
-
- /** The length of the string in bytes of memory. */
- size_t length;
-
- /** The type of the string. This field determines how the string should be freed. */
- enum {
- /** This string is a constant string, and should not be freed. */
- PM_STRING_CONSTANT,
-
- /** This is a slice of another string, and should not be freed. */
- PM_STRING_SHARED,
-
- /** This string owns its memory, and should be freed using `pm_string_free`. */
- PM_STRING_OWNED,
-
-#ifdef PRISM_HAS_MMAP
- /** This string is a memory-mapped file, and should be freed using `pm_string_free`. */
- PM_STRING_MAPPED
-#endif
- } type;
-} pm_string_t;
-
-/**
- * Returns the size of the pm_string_t struct. This is necessary to allocate the
- * correct amount of memory in the FFI backend.
- *
- * @return The size of the pm_string_t struct.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
-
-/**
- * Defines an empty string. This is useful for initializing a string that will
- * be filled in later.
- */
-#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
-
-/**
- * Initialize a shared string that is based on initial input.
- *
- * @param string The string to initialize.
- * @param start The start of the string.
- * @param end The end of the string.
- */
-void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
-
-/**
- * Initialize an owned string that is responsible for freeing allocated memory.
- *
- * @param string The string to initialize.
- * @param source The source of the string.
- * @param length The length of the string.
- */
-void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
-
-/**
- * Initialize a constant string that doesn't own its memory source.
- *
- * @param string The string to initialize.
- * @param source The source of the string.
- * @param length The length of the string.
- */
-void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
-
-/**
- * Represents the result of calling pm_string_mapped_init or
- * pm_string_file_init. We need this additional information because there is
- * not a platform-agnostic way to indicate that the file that was attempted to
- * be opened was a directory.
- */
-typedef enum {
- /** Indicates that the string was successfully initialized. */
- PM_STRING_INIT_SUCCESS = 0,
- /**
- * Indicates a generic error from a string_*_init function, where the type
- * of error should be read from `errno` or `GetLastError()`.
- */
- PM_STRING_INIT_ERROR_GENERIC = 1,
- /**
- * Indicates that the file that was attempted to be opened was a directory.
- */
- PM_STRING_INIT_ERROR_DIRECTORY = 2
-} pm_string_init_result_t;
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * We want to use demand paging as much as possible in order to avoid having to
- * read the entire file into memory (which could be detrimental to performance
- * for large files). This means that if we're on windows we'll use
- * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
- * `mmap`, and on other POSIX systems we'll use `read`.
- *
- * @param string The string to initialize.
- * @param filepath The filepath to read.
- * @return The success of the read, indicated by the value of the enum.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * @param string The string to initialize.
- * @param filepath The filepath to read.
- * @return The success of the read, indicated by the value of the enum.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
-
-/**
- * Ensure the string is owned. If it is not, then reinitialize it as owned and
- * copy over the previous source.
- *
- * @param string The string to ensure is owned.
- */
-void pm_string_ensure_owned(pm_string_t *string);
-
-/**
- * Compare the underlying lengths and bytes of two strings. Returns 0 if the
- * strings are equal, a negative number if the left string is less than the
- * right string, and a positive number if the left string is greater than the
- * right string.
- *
- * @param left The left string to compare.
- * @param right The right string to compare.
- * @return The comparison result.
- */
-int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
-
-/**
- * Returns the length associated with the string.
- *
- * @param string The string to get the length of.
- * @return The length of the string.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
-
-/**
- * Returns the start pointer associated with the string.
- *
- * @param string The string to get the start pointer of.
- * @return The start pointer of the string.
- */
-PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
-
-/**
- * Free the associated memory of the given string.
- *
- * @param string The string to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
-
-#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c
deleted file mode 100644
index 3f58421554..0000000000
--- a/prism/util/pm_strncasecmp.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include "prism/util/pm_strncasecmp.h"
-
-/**
- * A locale-insensitive version of `tolower(3)`
- */
-static inline int
-pm_tolower(int c)
-{
- if ('A' <= c && c <= 'Z') {
- return c | 0x20;
- }
- return c;
-}
-
-/**
- * Compare two strings, ignoring case, up to the given length. Returns 0 if the
- * strings are equal, a negative number if string1 is less than string2, or a
- * positive number if string1 is greater than string2.
- *
- * Note that this is effectively our own implementation of strncasecmp, but it's
- * not available on all of the platforms we want to support so we're rolling it
- * here.
- */
-int
-pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
- size_t offset = 0;
- int difference = 0;
-
- while (offset < length && string1[offset] != '\0') {
- if (string2[offset] == '\0') return string1[offset];
- if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference;
- offset++;
- }
-
- return difference;
-}
diff --git a/prism/util/pm_strncasecmp.h b/prism/util/pm_strncasecmp.h
deleted file mode 100644
index 5cb88cb5eb..0000000000
--- a/prism/util/pm_strncasecmp.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * @file pm_strncasecmp.h
- *
- * A custom strncasecmp implementation.
- */
-#ifndef PRISM_STRNCASECMP_H
-#define PRISM_STRNCASECMP_H
-
-#include "prism/defines.h"
-
-#include <ctype.h>
-#include <stddef.h>
-#include <stdint.h>
-
-/**
- * Compare two strings, ignoring case, up to the given length. Returns 0 if the
- * strings are equal, a negative number if string1 is less than string2, or a
- * positive number if string1 is greater than string2.
- *
- * Note that this is effectively our own implementation of strncasecmp, but it's
- * not available on all of the platforms we want to support so we're rolling it
- * here.
- *
- * @param string1 The first string to compare.
- * @param string2 The second string to compare
- * @param length The maximum number of characters to compare.
- * @return 0 if the strings are equal, a negative number if string1 is less than
- * string2, or a positive number if string1 is greater than string2.
- */
-int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
-
-#endif
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
deleted file mode 100644
index 916a4cc3fd..0000000000
--- a/prism/util/pm_strpbrk.c
+++ /dev/null
@@ -1,206 +0,0 @@
-#include "prism/util/pm_strpbrk.h"
-
-/**
- * Add an invalid multibyte character error to the parser.
- */
-static inline void
-pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
-}
-
-/**
- * Set the explicit encoding for the parser to the current encoding.
- */
-static inline void
-pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
- if (parser->explicit_encoding != NULL) {
- if (parser->explicit_encoding == parser->encoding) {
- // Okay, we already locked to this encoding.
- } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
- // Not okay, we already found a Unicode escape sequence and this
- // conflicts.
- pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
- } else {
- // Should not be anything else.
- assert(false && "unreachable");
- }
- }
-
- parser->explicit_encoding = parser->encoding;
-}
-
-/**
- * This is the default path.
- */
-static inline const uint8_t *
-pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (source[index] < 0x80) {
- index++;
- } else {
- size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
-
- if (width > 0) {
- index += width;
- } else if (!validate) {
- index++;
- } else {
- // At this point we know we have an invalid multibyte character.
- // We'll walk forward as far as we can until we find the next
- // valid character so that we don't spam the user with a ton of
- // the same kind of error.
- const size_t start = index;
-
- do {
- index++;
- } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
- pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
- }
- }
- }
-
- return NULL;
-}
-
-/**
- * This is the path when the encoding is ASCII-8BIT.
- */
-static inline const uint8_t *
-pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
- index++;
- }
-
- return NULL;
-}
-
-/**
- * This is the slow path that does care about the encoding.
- */
-static inline const uint8_t *
-pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
- const pm_encoding_t *encoding = parser->encoding;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (source[index] < 0x80) {
- index++;
- } else {
- size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
- if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
-
- if (width > 0) {
- index += width;
- } else if (!validate) {
- index++;
- } else {
- // At this point we know we have an invalid multibyte character.
- // We'll walk forward as far as we can until we find the next
- // valid character so that we don't spam the user with a ton of
- // the same kind of error.
- const size_t start = index;
-
- do {
- index++;
- } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
- pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
- }
- }
- }
-
- return NULL;
-}
-
-/**
- * This is the fast path that does not care about the encoding because we know
- * the encoding only supports single-byte characters.
- */
-static inline const uint8_t *
-pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
- const pm_encoding_t *encoding = parser->encoding;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (source[index] < 0x80 || !validate) {
- index++;
- } else {
- size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
- pm_strpbrk_explicit_encoding_set(parser, source, width);
-
- if (width > 0) {
- index += width;
- } else {
- // At this point we know we have an invalid multibyte character.
- // We'll walk forward as far as we can until we find the next
- // valid character so that we don't spam the user with a ton of
- // the same kind of error.
- const size_t start = index;
-
- do {
- index++;
- } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
- pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
- }
- }
- }
-
- return NULL;
-}
-
-/**
- * Here we have rolled our own version of strpbrk. The standard library strpbrk
- * has undefined behavior when the source string is not null-terminated. We want
- * to support strings that are not null-terminated because pm_parse does not
- * have the contract that the string is null-terminated. (This is desirable
- * because it means the extension can call pm_parse with the result of a call to
- * mmap).
- *
- * The standard library strpbrk also does not support passing a maximum length
- * to search. We want to support this for the reason mentioned above, but we
- * also don't want it to stop on null bytes. Ruby actually allows null bytes
- * within strings, comments, regular expressions, etc. So we need to be able to
- * skip past them.
- *
- * Finally, we want to support encodings wherein the charset could contain
- * characters that are trailing bytes of multi-byte characters. For example, in
- * Shift_JIS, the backslash character can be a trailing byte. In that case we
- * need to take a slower path and iterate one multi-byte character at a time.
- */
-const uint8_t *
-pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
- if (length <= 0) {
- return NULL;
- } else if (!parser->encoding_changed) {
- return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
- } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
- return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
- } else if (parser->encoding->multibyte) {
- return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
- } else {
- return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
- }
-}
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
deleted file mode 100644
index f387bd5782..0000000000
--- a/prism/util/pm_strpbrk.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * @file pm_strpbrk.h
- *
- * A custom strpbrk implementation.
- */
-#ifndef PRISM_STRPBRK_H
-#define PRISM_STRPBRK_H
-
-#include "prism/defines.h"
-#include "prism/diagnostic.h"
-#include "prism/parser.h"
-
-#include <stddef.h>
-#include <string.h>
-
-/**
- * Here we have rolled our own version of strpbrk. The standard library strpbrk
- * has undefined behavior when the source string is not null-terminated. We want
- * to support strings that are not null-terminated because pm_parse does not
- * have the contract that the string is null-terminated. (This is desirable
- * because it means the extension can call pm_parse with the result of a call to
- * mmap).
- *
- * The standard library strpbrk also does not support passing a maximum length
- * to search. We want to support this for the reason mentioned above, but we
- * also don't want it to stop on null bytes. Ruby actually allows null bytes
- * within strings, comments, regular expressions, etc. So we need to be able to
- * skip past them.
- *
- * Finally, we want to support encodings wherein the charset could contain
- * characters that are trailing bytes of multi-byte characters. For example, in
- * Shift-JIS, the backslash character can be a trailing byte. In that case we
- * need to take a slower path and iterate one multi-byte character at a time.
- *
- * @param parser The parser.
- * @param source The source to search.
- * @param charset The charset to search for.
- * @param length The maximum number of bytes to search.
- * @param validate Whether to validate that the source string is valid in the
- * current encoding of the parser.
- * @return A pointer to the first character in the source string that is in the
- * charset, or NULL if no such character exists.
- */
-const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
-
-#endif