diff options
Diffstat (limited to 'prism/util')
| -rw-r--r-- | prism/util/pm_buffer.c | 357 | ||||
| -rw-r--r-- | prism/util/pm_buffer.h | 228 | ||||
| -rw-r--r-- | prism/util/pm_char.c | 318 | ||||
| -rw-r--r-- | prism/util/pm_char.h | 204 | ||||
| -rw-r--r-- | prism/util/pm_constant_pool.c | 342 | ||||
| -rw-r--r-- | prism/util/pm_constant_pool.h | 218 | ||||
| -rw-r--r-- | prism/util/pm_integer.c | 670 | ||||
| -rw-r--r-- | prism/util/pm_integer.h | 126 | ||||
| -rw-r--r-- | prism/util/pm_list.c | 49 | ||||
| -rw-r--r-- | prism/util/pm_list.h | 97 | ||||
| -rw-r--r-- | prism/util/pm_memchr.c | 35 | ||||
| -rw-r--r-- | prism/util/pm_memchr.h | 29 | ||||
| -rw-r--r-- | prism/util/pm_newline_list.c | 125 | ||||
| -rw-r--r-- | prism/util/pm_newline_list.h | 113 | ||||
| -rw-r--r-- | prism/util/pm_string.c | 383 | ||||
| -rw-r--r-- | prism/util/pm_string.h | 190 | ||||
| -rw-r--r-- | prism/util/pm_strncasecmp.c | 36 | ||||
| -rw-r--r-- | prism/util/pm_strncasecmp.h | 32 | ||||
| -rw-r--r-- | prism/util/pm_strpbrk.c | 206 | ||||
| -rw-r--r-- | prism/util/pm_strpbrk.h | 46 |
20 files changed, 0 insertions, 3804 deletions
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c deleted file mode 100644 index 2136a7c43e..0000000000 --- a/prism/util/pm_buffer.c +++ /dev/null @@ -1,357 +0,0 @@ -#include "prism/util/pm_buffer.h" - -/** - * Return the size of the pm_buffer_t struct. - */ -size_t -pm_buffer_sizeof(void) { - return sizeof(pm_buffer_t); -} - -/** - * Initialize a pm_buffer_t with the given capacity. - */ -bool -pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) { - buffer->length = 0; - buffer->capacity = capacity; - - buffer->value = (char *) xmalloc(capacity); - return buffer->value != NULL; -} - -/** - * Initialize a pm_buffer_t with its default values. - */ -bool -pm_buffer_init(pm_buffer_t *buffer) { - return pm_buffer_init_capacity(buffer, 1024); -} - -/** - * Return the value of the buffer. - */ -char * -pm_buffer_value(const pm_buffer_t *buffer) { - return buffer->value; -} - -/** - * Return the length of the buffer. - */ -size_t -pm_buffer_length(const pm_buffer_t *buffer) { - return buffer->length; -} - -/** - * Append the given amount of space to the buffer. - */ -static inline bool -pm_buffer_append_length(pm_buffer_t *buffer, size_t length) { - size_t next_length = buffer->length + length; - - if (next_length > buffer->capacity) { - if (buffer->capacity == 0) { - buffer->capacity = 1; - } - - while (next_length > buffer->capacity) { - buffer->capacity *= 2; - } - - buffer->value = xrealloc(buffer->value, buffer->capacity); - if (buffer->value == NULL) return false; - } - - buffer->length = next_length; - return true; -} - -/** - * Append a generic pointer to memory to the buffer. - */ -static inline void -pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) { - size_t cursor = buffer->length; - if (pm_buffer_append_length(buffer, length)) { - memcpy(buffer->value + cursor, source, length); - } -} - -/** - * Append the given amount of space as zeroes to the buffer. - */ -void -pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length) { - size_t cursor = buffer->length; - if (pm_buffer_append_length(buffer, length)) { - memset(buffer->value + cursor, 0, length); - } -} - -/** - * Append a formatted string to the buffer. - */ -void -pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) { - va_list arguments; - va_start(arguments, format); - int result = vsnprintf(NULL, 0, format, arguments); - va_end(arguments); - - if (result < 0) return; - size_t length = (size_t) (result + 1); - - size_t cursor = buffer->length; - if (pm_buffer_append_length(buffer, length)) { - va_start(arguments, format); - vsnprintf(buffer->value + cursor, length, format, arguments); - va_end(arguments); - buffer->length--; - } -} - -/** - * Append a string to the buffer. - */ -void -pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length) { - pm_buffer_append(buffer, value, length); -} - -/** - * Append a list of bytes to the buffer. - */ -void -pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length) { - pm_buffer_append(buffer, (const char *) value, length); -} - -/** - * Append a single byte to the buffer. - */ -void -pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value) { - const void *source = &value; - pm_buffer_append(buffer, source, sizeof(uint8_t)); -} - -/** - * Append a 32-bit unsigned integer to the buffer as a variable-length integer. - */ -void -pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) { - if (value < 128) { - pm_buffer_append_byte(buffer, (uint8_t) value); - } else { - uint32_t n = value; - while (n >= 128) { - pm_buffer_append_byte(buffer, (uint8_t) (n | 128)); - n >>= 7; - } - pm_buffer_append_byte(buffer, (uint8_t) n); - } -} - -/** - * Append a 32-bit signed integer to the buffer as a variable-length integer. - */ -void -pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) { - uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31)); - pm_buffer_append_varuint(buffer, unsigned_int); -} - -/** - * Append a double to the buffer. - */ -void -pm_buffer_append_double(pm_buffer_t *buffer, double value) { - const void *source = &value; - pm_buffer_append(buffer, source, sizeof(double)); -} - -/** - * Append a unicode codepoint to the buffer. - */ -bool -pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value) { - if (value <= 0x7F) { - pm_buffer_append_byte(buffer, (uint8_t) value); // 0xxxxxxx - return true; - } else if (value <= 0x7FF) { - uint8_t bytes[] = { - (uint8_t) (0xC0 | ((value >> 6) & 0x3F)), // 110xxxxx - (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx - }; - - pm_buffer_append_bytes(buffer, bytes, 2); - return true; - } else if (value <= 0xFFFF) { - uint8_t bytes[] = { - (uint8_t) (0xE0 | ((value >> 12) & 0x3F)), // 1110xxxx - (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx - (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx - }; - - pm_buffer_append_bytes(buffer, bytes, 3); - return true; - } else if (value <= 0x10FFFF) { - uint8_t bytes[] = { - (uint8_t) (0xF0 | ((value >> 18) & 0x3F)), // 11110xxx - (uint8_t) (0x80 | ((value >> 12) & 0x3F)), // 10xxxxxx - (uint8_t) (0x80 | ((value >> 6) & 0x3F)), // 10xxxxxx - (uint8_t) (0x80 | (value & 0x3F)) // 10xxxxxx - }; - - pm_buffer_append_bytes(buffer, bytes, 4); - return true; - } else { - return false; - } -} - -/** - * Append a slice of source code to the buffer. - */ -void -pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) { - for (size_t index = 0; index < length; index++) { - const uint8_t byte = source[index]; - - if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) { - if (escaping == PM_BUFFER_ESCAPING_RUBY) { - pm_buffer_append_format(buffer, "\\x%02X", byte); - } else { - pm_buffer_append_format(buffer, "\\u%04X", byte); - } - } else { - switch (byte) { - case '\a': - if (escaping == PM_BUFFER_ESCAPING_RUBY) { - pm_buffer_append_string(buffer, "\\a", 2); - } else { - pm_buffer_append_format(buffer, "\\u%04X", byte); - } - break; - case '\b': - pm_buffer_append_string(buffer, "\\b", 2); - break; - case '\t': - pm_buffer_append_string(buffer, "\\t", 2); - break; - case '\n': - pm_buffer_append_string(buffer, "\\n", 2); - break; - case '\v': - if (escaping == PM_BUFFER_ESCAPING_RUBY) { - pm_buffer_append_string(buffer, "\\v", 2); - } else { - pm_buffer_append_format(buffer, "\\u%04X", byte); - } - break; - case '\f': - pm_buffer_append_string(buffer, "\\f", 2); - break; - case '\r': - pm_buffer_append_string(buffer, "\\r", 2); - break; - case '"': - pm_buffer_append_string(buffer, "\\\"", 2); - break; - case '#': { - if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) { - const uint8_t next_byte = source[index + 1]; - if (next_byte == '{' || next_byte == '@' || next_byte == '$') { - pm_buffer_append_byte(buffer, '\\'); - } - } - - pm_buffer_append_byte(buffer, '#'); - break; - } - case '\\': - pm_buffer_append_string(buffer, "\\\\", 2); - break; - default: - pm_buffer_append_byte(buffer, byte); - break; - } - } - } -} - -/** - * Prepend the given string to the buffer. - */ -void -pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) { - size_t cursor = buffer->length; - if (pm_buffer_append_length(buffer, length)) { - memmove(buffer->value + length, buffer->value, cursor); - memcpy(buffer->value, value, length); - } -} - -/** - * Concatenate one buffer onto another. - */ -void -pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source) { - if (source->length > 0) { - pm_buffer_append(destination, source->value, source->length); - } -} - -/** - * Clear the buffer by reducing its size to 0. This does not free the allocated - * memory, but it does allow the buffer to be reused. - */ -void -pm_buffer_clear(pm_buffer_t *buffer) { - buffer->length = 0; -} - -/** - * Strip the whitespace from the end of the buffer. - */ -void -pm_buffer_rstrip(pm_buffer_t *buffer) { - while (buffer->length > 0 && pm_char_is_whitespace((uint8_t) buffer->value[buffer->length - 1])) { - buffer->length--; - } -} - -/** - * Checks if the buffer includes the given value. - */ -size_t -pm_buffer_index(const pm_buffer_t *buffer, char value) { - const char *first = memchr(buffer->value, value, buffer->length); - return (first == NULL) ? SIZE_MAX : (size_t) (first - buffer->value); -} - -/** - * Insert the given string into the buffer at the given index. - */ -void -pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length) { - assert(index <= buffer->length); - - if (index == buffer->length) { - pm_buffer_append_string(buffer, value, length); - } else { - pm_buffer_append_zeroes(buffer, length); - memmove(buffer->value + index + length, buffer->value + index, buffer->length - length - index); - memcpy(buffer->value + index, value, length); - } -} - -/** - * Free the memory associated with the buffer. - */ -void -pm_buffer_free(pm_buffer_t *buffer) { - xfree(buffer->value); -} diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h deleted file mode 100644 index f3c20ab2a5..0000000000 --- a/prism/util/pm_buffer.h +++ /dev/null @@ -1,228 +0,0 @@ -/** - * @file pm_buffer.h - * - * A wrapper around a contiguous block of allocated memory. - */ -#ifndef PRISM_BUFFER_H -#define PRISM_BUFFER_H - -#include "prism/defines.h" -#include "prism/util/pm_char.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -/** - * A pm_buffer_t is a simple memory buffer that stores data in a contiguous - * block of memory. - */ -typedef struct { - /** The length of the buffer in bytes. */ - size_t length; - - /** The capacity of the buffer in bytes that has been allocated. */ - size_t capacity; - - /** A pointer to the start of the buffer. */ - char *value; -} pm_buffer_t; - -/** - * Return the size of the pm_buffer_t struct. - * - * @returns The size of the pm_buffer_t struct. - */ -PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void); - -/** - * Initialize a pm_buffer_t with the given capacity. - * - * @param buffer The buffer to initialize. - * @param capacity The capacity of the buffer. - * @returns True if the buffer was initialized successfully, false otherwise. - */ -bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity); - -/** - * Initialize a pm_buffer_t with its default values. - * - * @param buffer The buffer to initialize. - * @returns True if the buffer was initialized successfully, false otherwise. - */ -PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer); - -/** - * Return the value of the buffer. - * - * @param buffer The buffer to get the value of. - * @returns The value of the buffer. - */ -PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer); - -/** - * Return the length of the buffer. - * - * @param buffer The buffer to get the length of. - * @returns The length of the buffer. - */ -PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer); - -/** - * Append the given amount of space as zeroes to the buffer. - * - * @param buffer The buffer to append to. - * @param length The amount of space to append and zero. - */ -void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length); - -/** - * Append a formatted string to the buffer. - * - * @param buffer The buffer to append to. - * @param format The format string to append. - * @param ... The arguments to the format string. - */ -void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3); - -/** - * Append a string to the buffer. - * - * @param buffer The buffer to append to. - * @param value The string to append. - * @param length The length of the string to append. - */ -void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length); - -/** - * Append a list of bytes to the buffer. - * - * @param buffer The buffer to append to. - * @param value The bytes to append. - * @param length The length of the bytes to append. - */ -void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length); - -/** - * Append a single byte to the buffer. - * - * @param buffer The buffer to append to. - * @param value The byte to append. - */ -void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value); - -/** - * Append a 32-bit unsigned integer to the buffer as a variable-length integer. - * - * @param buffer The buffer to append to. - * @param value The integer to append. - */ -void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value); - -/** - * Append a 32-bit signed integer to the buffer as a variable-length integer. - * - * @param buffer The buffer to append to. - * @param value The integer to append. - */ -void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value); - -/** - * Append a double to the buffer. - * - * @param buffer The buffer to append to. - * @param value The double to append. - */ -void pm_buffer_append_double(pm_buffer_t *buffer, double value); - -/** - * Append a unicode codepoint to the buffer. - * - * @param buffer The buffer to append to. - * @param value The character to append. - * @returns True if the codepoint was valid and appended successfully, false - * otherwise. - */ -bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value); - -/** - * The different types of escaping that can be performed by the buffer when - * appending a slice of Ruby source code. - */ -typedef enum { - PM_BUFFER_ESCAPING_RUBY, - PM_BUFFER_ESCAPING_JSON -} pm_buffer_escaping_t; - -/** - * Append a slice of source code to the buffer. - * - * @param buffer The buffer to append to. - * @param source The source code to append. - * @param length The length of the source code to append. - * @param escaping The type of escaping to perform. - */ -void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping); - -/** - * Prepend the given string to the buffer. - * - * @param buffer The buffer to prepend to. - * @param value The string to prepend. - * @param length The length of the string to prepend. - */ -void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length); - -/** - * Concatenate one buffer onto another. - * - * @param destination The buffer to concatenate onto. - * @param source The buffer to concatenate. - */ -void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source); - -/** - * Clear the buffer by reducing its size to 0. This does not free the allocated - * memory, but it does allow the buffer to be reused. - * - * @param buffer The buffer to clear. - */ -void pm_buffer_clear(pm_buffer_t *buffer); - -/** - * Strip the whitespace from the end of the buffer. - * - * @param buffer The buffer to strip. - */ -void pm_buffer_rstrip(pm_buffer_t *buffer); - -/** - * Checks if the buffer includes the given value. - * - * @param buffer The buffer to check. - * @param value The value to check for. - * @returns The index of the first occurrence of the value in the buffer, or - * SIZE_MAX if the value is not found. - */ -size_t pm_buffer_index(const pm_buffer_t *buffer, char value); - -/** - * Insert the given string into the buffer at the given index. - * - * @param buffer The buffer to insert into. - * @param index The index to insert at. - * @param value The string to insert. - * @param length The length of the string to insert. - */ -void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length); - -/** - * Free the memory associated with the buffer. - * - * @param buffer The buffer to free. - */ -PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer); - -#endif diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c deleted file mode 100644 index a51dc11645..0000000000 --- a/prism/util/pm_char.c +++ /dev/null @@ -1,318 +0,0 @@ -#include "prism/util/pm_char.h" - -#define PRISM_CHAR_BIT_WHITESPACE (1 << 0) -#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1) -#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2) - -#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0) -#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1) -#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2) -#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3) -#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4) -#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5) -#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6) -#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7) - -static const uint8_t pm_byte_table[256] = { -// 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x - 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x - 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx -}; - -static const uint8_t pm_number_table[256] = { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x - 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x - 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x - 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx -}; - -/** - * Returns the number of characters at the start of the string that match the - * given kind. Disallows searching past the given maximum number of characters. - */ -static inline size_t -pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { - if (length <= 0) return 0; - - size_t size = 0; - size_t maximum = (size_t) length; - - while (size < maximum && (pm_byte_table[string[size]] & kind)) size++; - return size; -} - -/** - * Returns the number of characters at the start of the string that are - * whitespace. Disallows searching past the given maximum number of characters. - */ -size_t -pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { - return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE); -} - -/** - * Returns the number of characters at the start of the string that are - * whitespace while also tracking the location of each newline. Disallows - * searching past the given maximum number of characters. - */ -size_t -pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) { - if (length <= 0) return 0; - - size_t size = 0; - size_t maximum = (size_t) length; - - while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) { - if (string[size] == '\n') { - pm_newline_list_append(newline_list, string + size); - } - - size++; - } - - return size; -} - -/** - * Returns the number of characters at the start of the string that are inline - * whitespace. Disallows searching past the given maximum number of characters. - */ -size_t -pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { - return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE); -} - -/** - * Returns the number of characters at the start of the string that are regexp - * options. Disallows searching past the given maximum number of characters. - */ -size_t -pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) { - return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION); -} - -/** - * Returns true if the given character matches the given kind. - */ -static inline bool -pm_char_is_char_kind(const uint8_t b, uint8_t kind) { - return (pm_byte_table[b] & kind) != 0; -} - -/** - * Returns true if the given character is a whitespace character. - */ -bool -pm_char_is_whitespace(const uint8_t b) { - return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE); -} - -/** - * Returns true if the given character is an inline whitespace character. - */ -bool -pm_char_is_inline_whitespace(const uint8_t b) { - return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE); -} - -/** - * Scan through the string and return the number of characters at the start of - * the string that match the given kind. Disallows searching past the given - * maximum number of characters. - */ -static inline size_t -pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { - if (length <= 0) return 0; - - size_t size = 0; - size_t maximum = (size_t) length; - - while (size < maximum && (pm_number_table[string[size]] & kind)) size++; - return size; -} - -/** - * Scan through the string and return the number of characters at the start of - * the string that match the given kind. Disallows searching past the given - * maximum number of characters. - * - * Additionally, report the location of the last invalid underscore character - * found in the string through the out invalid parameter. - */ -static inline size_t -pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) { - if (length <= 0) return 0; - - size_t size = 0; - size_t maximum = (size_t) length; - - bool underscore = false; - while (size < maximum && (pm_number_table[string[size]] & kind)) { - if (string[size] == '_') { - if (underscore) *invalid = string + size; - underscore = true; - } else { - underscore = false; - } - - size++; - } - - if (size > 0 && string[size - 1] == '_') *invalid = string + size - 1; - return size; -} - -/** - * Returns the number of characters at the start of the string that are binary - * digits or underscores. Disallows searching past the given maximum number of - * characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - */ -size_t -pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { - return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER); -} - -/** - * Returns the number of characters at the start of the string that are octal - * digits or underscores. Disallows searching past the given maximum number of - * characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - */ -size_t -pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { - return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER); -} - -/** - * Returns the number of characters at the start of the string that are decimal - * digits. Disallows searching past the given maximum number of characters. - */ -size_t -pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) { - return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT); -} - -/** - * Returns the number of characters at the start of the string that are decimal - * digits or underscores. Disallows searching past the given maximum number of - * characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore - */ -size_t -pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { - return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER); -} - -/** - * Returns the number of characters at the start of the string that are - * hexadecimal digits. Disallows searching past the given maximum number of - * characters. - */ -size_t -pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) { - return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); -} - -/** - * Returns the number of characters at the start of the string that are - * hexadecimal digits or underscores. Disallows searching past the given maximum - * number of characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - */ -size_t -pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { - return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER); -} - -/** - * Returns true if the given character matches the given kind. - */ -static inline bool -pm_char_is_number_kind(const uint8_t b, uint8_t kind) { - return (pm_number_table[b] & kind) != 0; -} - -/** - * Returns true if the given character is a binary digit. - */ -bool -pm_char_is_binary_digit(const uint8_t b) { - return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT); -} - -/** - * Returns true if the given character is an octal digit. - */ -bool -pm_char_is_octal_digit(const uint8_t b) { - return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT); -} - -/** - * Returns true if the given character is a decimal digit. - */ -bool -pm_char_is_decimal_digit(const uint8_t b) { - return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT); -} - -/** - * Returns true if the given character is a hexadecimal digit. - */ -bool -pm_char_is_hexadecimal_digit(const uint8_t b) { - return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT); -} - -#undef PRISM_CHAR_BIT_WHITESPACE -#undef PRISM_CHAR_BIT_INLINE_WHITESPACE -#undef PRISM_CHAR_BIT_REGEXP_OPTION - -#undef PRISM_NUMBER_BIT_BINARY_DIGIT -#undef PRISM_NUMBER_BIT_BINARY_NUMBER -#undef PRISM_NUMBER_BIT_OCTAL_DIGIT -#undef PRISM_NUMBER_BIT_OCTAL_NUMBER -#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT -#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER -#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER -#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h deleted file mode 100644 index deeafd6321..0000000000 --- a/prism/util/pm_char.h +++ /dev/null @@ -1,204 +0,0 @@ -/** - * @file pm_char.h - * - * Functions for working with characters and strings. - */ -#ifndef PRISM_CHAR_H -#define PRISM_CHAR_H - -#include "prism/defines.h" -#include "prism/util/pm_newline_list.h" - -#include <stdbool.h> -#include <stddef.h> - -/** - * Returns the number of characters at the start of the string that are - * whitespace. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are - * whitespace. - */ -size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length); - -/** - * Returns the number of characters at the start of the string that are - * whitespace while also tracking the location of each newline. Disallows - * searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param newline_list The list of newlines to populate. - * @return The number of characters at the start of the string that are - * whitespace. - */ -size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list); - -/** - * Returns the number of characters at the start of the string that are inline - * whitespace. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are inline - * whitespace. - */ -size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length); - -/** - * Returns the number of characters at the start of the string that are decimal - * digits. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are decimal - * digits. - */ -size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length); - -/** - * Returns the number of characters at the start of the string that are - * hexadecimal digits. Disallows searching past the given maximum number of - * characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are - * hexadecimal digits. - */ -size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length); - -/** - * Returns the number of characters at the start of the string that are octal - * digits or underscores. Disallows searching past the given maximum number of - * characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are octal - * digits or underscores. - */ -size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); - -/** - * Returns the number of characters at the start of the string that are decimal - * digits or underscores. Disallows searching past the given maximum number of - * characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are decimal - * digits or underscores. - */ -size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); - -/** - * Returns the number of characters at the start of the string that are - * hexadecimal digits or underscores. Disallows searching past the given maximum - * number of characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are - * hexadecimal digits or underscores. - */ -size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); - -/** - * Returns the number of characters at the start of the string that are regexp - * options. Disallows searching past the given maximum number of characters. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @return The number of characters at the start of the string that are regexp - * options. - */ -size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); - -/** - * Returns the number of characters at the start of the string that are binary - * digits or underscores. Disallows searching past the given maximum number of - * characters. - * - * If multiple underscores are found in a row or if an underscore is - * found at the end of the number, then the invalid pointer is set to the index - * of the first invalid underscore. - * - * @param string The string to search. - * @param length The maximum number of characters to search. - * @param invalid The pointer to set to the index of the first invalid - * underscore. - * @return The number of characters at the start of the string that are binary - * digits or underscores. - */ -size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); - -/** - * Returns true if the given character is a whitespace character. - * - * @param b The character to check. - * @return True if the given character is a whitespace character. - */ -bool pm_char_is_whitespace(const uint8_t b); - -/** - * Returns true if the given character is an inline whitespace character. - * - * @param b The character to check. - * @return True if the given character is an inline whitespace character. - */ -bool pm_char_is_inline_whitespace(const uint8_t b); - -/** - * Returns true if the given character is a binary digit. - * - * @param b The character to check. - * @return True if the given character is a binary digit. - */ -bool pm_char_is_binary_digit(const uint8_t b); - -/** - * Returns true if the given character is an octal digit. - * - * @param b The character to check. - * @return True if the given character is an octal digit. - */ -bool pm_char_is_octal_digit(const uint8_t b); - -/** - * Returns true if the given character is a decimal digit. - * - * @param b The character to check. - * @return True if the given character is a decimal digit. - */ -bool pm_char_is_decimal_digit(const uint8_t b); - -/** - * Returns true if the given character is a hexadecimal digit. - * - * @param b The character to check. - * @return True if the given character is a hexadecimal digit. - */ -bool pm_char_is_hexadecimal_digit(const uint8_t b); - -#endif diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c deleted file mode 100644 index 38ea01a228..0000000000 --- a/prism/util/pm_constant_pool.c +++ /dev/null @@ -1,342 +0,0 @@ -#include "prism/util/pm_constant_pool.h" - -/** - * Initialize a list of constant ids. - */ -void -pm_constant_id_list_init(pm_constant_id_list_t *list) { - list->ids = NULL; - list->size = 0; - list->capacity = 0; -} - -/** - * Initialize a list of constant ids with a given capacity. - */ -void -pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) { - if (capacity) { - list->ids = xcalloc(capacity, sizeof(pm_constant_id_t)); - if (list->ids == NULL) abort(); - } else { - list->ids = NULL; - } - - list->size = 0; - list->capacity = capacity; -} - -/** - * Append a constant id to a list of constant ids. Returns false if any - * potential reallocations fail. - */ -bool -pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) { - if (list->size >= list->capacity) { - list->capacity = list->capacity == 0 ? 8 : list->capacity * 2; - list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity); - if (list->ids == NULL) return false; - } - - list->ids[list->size++] = id; - return true; -} - -/** - * Insert a constant id into a list of constant ids at the specified index. - */ -void -pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) { - assert(index < list->capacity); - assert(list->ids[index] == PM_CONSTANT_ID_UNSET); - - list->ids[index] = id; - list->size++; -} - -/** - * Checks if the current constant id list includes the given constant id. - */ -bool -pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) { - for (size_t index = 0; index < list->size; index++) { - if (list->ids[index] == id) return true; - } - return false; -} - -/** - * Free the memory associated with a list of constant ids. - */ -void -pm_constant_id_list_free(pm_constant_id_list_t *list) { - if (list->ids != NULL) { - xfree(list->ids); - } -} - -/** - * A relatively simple hash function (djb2) that is used to hash strings. We are - * optimizing here for simplicity and speed. - */ -static inline uint32_t -pm_constant_pool_hash(const uint8_t *start, size_t length) { - // This is a prime number used as the initial value for the hash function. - uint32_t value = 5381; - - for (size_t index = 0; index < length; index++) { - value = ((value << 5) + value) + start[index]; - } - - return value; -} - -/** - * https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - */ -static uint32_t -next_power_of_two(uint32_t v) { - // Avoid underflow in subtraction on next line. - if (v == 0) { - // 1 is the nearest power of 2 to 0 (2^0) - return 1; - } - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v; -} - -#ifndef NDEBUG -static bool -is_power_of_two(uint32_t size) { - return (size & (size - 1)) == 0; -} -#endif - -/** - * Resize a constant pool to a given capacity. - */ -static inline bool -pm_constant_pool_resize(pm_constant_pool_t *pool) { - assert(is_power_of_two(pool->capacity)); - - uint32_t next_capacity = pool->capacity * 2; - if (next_capacity < pool->capacity) return false; - - const uint32_t mask = next_capacity - 1; - const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t); - - void *next = xcalloc(next_capacity, element_size); - if (next == NULL) return false; - - pm_constant_pool_bucket_t *next_buckets = next; - pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t)); - - // For each bucket in the current constant pool, find the index in the - // next constant pool, and insert it. - for (uint32_t index = 0; index < pool->capacity; index++) { - pm_constant_pool_bucket_t *bucket = &pool->buckets[index]; - - // If an id is set on this constant, then we know we have content here. - // In this case we need to insert it into the next constant pool. - if (bucket->id != PM_CONSTANT_ID_UNSET) { - uint32_t next_index = bucket->hash & mask; - - // This implements linear scanning to find the next available slot - // in case this index is already taken. We don't need to bother - // comparing the values since we know that the hash is unique. - while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) { - next_index = (next_index + 1) & mask; - } - - // Here we copy over the entire bucket, which includes the id so - // that they are consistent between resizes. - next_buckets[next_index] = *bucket; - } - } - - // The constants are stable with respect to hash table resizes. - memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t)); - - // pool->constants and pool->buckets are allocated out of the same chunk - // of memory, with the buckets coming first. - xfree(pool->buckets); - pool->constants = next_constants; - pool->buckets = next_buckets; - pool->capacity = next_capacity; - return true; -} - -/** - * Initialize a new constant pool with a given capacity. - */ -bool -pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) { - const uint32_t maximum = (~((uint32_t) 0)); - if (capacity >= ((maximum / 2) + 1)) return false; - - capacity = next_power_of_two(capacity); - const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t); - void *memory = xcalloc(capacity, element_size); - if (memory == NULL) return false; - - pool->buckets = memory; - pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t)); - pool->size = 0; - pool->capacity = capacity; - return true; -} - -/** - * Return a pointer to the constant indicated by the given constant id. - */ -pm_constant_t * -pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) { - assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size); - return &pool->constants[constant_id - 1]; -} - -/** - * Find a constant in a constant pool. Returns the id of the constant, or 0 if - * the constant is not found. - */ -pm_constant_id_t -pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) { - assert(is_power_of_two(pool->capacity)); - const uint32_t mask = pool->capacity - 1; - - uint32_t hash = pm_constant_pool_hash(start, length); - uint32_t index = hash & mask; - pm_constant_pool_bucket_t *bucket; - - while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) { - pm_constant_t *constant = &pool->constants[bucket->id - 1]; - if ((constant->length == length) && memcmp(constant->start, start, length) == 0) { - return bucket->id; - } - - index = (index + 1) & mask; - } - - return PM_CONSTANT_ID_UNSET; -} - -/** - * Insert a constant into a constant pool and return its index in the pool. - */ -static inline pm_constant_id_t -pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) { - if (pool->size >= (pool->capacity / 4 * 3)) { - if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET; - } - - assert(is_power_of_two(pool->capacity)); - const uint32_t mask = pool->capacity - 1; - - uint32_t hash = pm_constant_pool_hash(start, length); - uint32_t index = hash & mask; - pm_constant_pool_bucket_t *bucket; - - while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) { - // If there is a collision, then we need to check if the content is the - // same as the content we are trying to insert. If it is, then we can - // return the id of the existing constant. - pm_constant_t *constant = &pool->constants[bucket->id - 1]; - - if ((constant->length == length) && memcmp(constant->start, start, length) == 0) { - // Since we have found a match, we need to check if this is - // attempting to insert a shared or an owned constant. We want to - // prefer shared constants since they don't require allocations. - if (type == PM_CONSTANT_POOL_BUCKET_OWNED) { - // If we're attempting to insert an owned constant and we have - // an existing constant, then either way we don't want the given - // memory. Either it's duplicated with the existing constant or - // it's not necessary because we have a shared version. - xfree((void *) start); - } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) { - // If we're attempting to insert a shared constant and the - // existing constant is owned, then we can free the owned - // constant and replace it with the shared constant. - xfree((void *) constant->start); - constant->start = start; - bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3); - } - - return bucket->id; - } - - index = (index + 1) & mask; - } - - // IDs are allocated starting at 1, since the value 0 denotes a non-existent - // constant. - uint32_t id = ++pool->size; - assert(pool->size < ((uint32_t) (1 << 30))); - - *bucket = (pm_constant_pool_bucket_t) { - .id = (unsigned int) (id & 0x3fffffff), - .type = (unsigned int) (type & 0x3), - .hash = hash - }; - - pool->constants[id - 1] = (pm_constant_t) { - .start = start, - .length = length, - }; - - return id; -} - -/** - * Insert a constant into a constant pool. Returns the id of the constant, or - * PM_CONSTANT_ID_UNSET if any potential calls to resize fail. - */ -pm_constant_id_t -pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) { - return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT); -} - -/** - * Insert a constant into a constant pool from memory that is now owned by the - * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any - * potential calls to resize fail. - */ -pm_constant_id_t -pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) { - return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED); -} - -/** - * Insert a constant into a constant pool from memory that is constant. Returns - * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to - * resize fail. - */ -pm_constant_id_t -pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) { - return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT); -} - -/** - * Free the memory associated with a constant pool. - */ -void -pm_constant_pool_free(pm_constant_pool_t *pool) { - // For each constant in the current constant pool, free the contents if the - // contents are owned. - for (uint32_t index = 0; index < pool->capacity; index++) { - pm_constant_pool_bucket_t *bucket = &pool->buckets[index]; - - // If an id is set on this constant, then we know we have content here. - if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) { - pm_constant_t *constant = &pool->constants[bucket->id - 1]; - xfree((void *) constant->start); - } - } - - xfree(pool->buckets); -} diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h deleted file mode 100644 index 6df23f8f50..0000000000 --- a/prism/util/pm_constant_pool.h +++ /dev/null @@ -1,218 +0,0 @@ -/** - * @file pm_constant_pool.h - * - * A data structure that stores a set of strings. - * - * Each string is assigned a unique id, which can be used to compare strings for - * equality. This comparison ends up being much faster than strcmp, since it - * only requires a single integer comparison. - */ -#ifndef PRISM_CONSTANT_POOL_H -#define PRISM_CONSTANT_POOL_H - -#include "prism/defines.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -/** - * When we allocate constants into the pool, we reserve 0 to mean that the slot - * is not yet filled. This constant is reused in other places to indicate the - * lack of a constant id. - */ -#define PM_CONSTANT_ID_UNSET 0 - -/** - * A constant id is a unique identifier for a constant in the constant pool. - */ -typedef uint32_t pm_constant_id_t; - -/** - * A list of constant IDs. Usually used to represent a set of locals. - */ -typedef struct { - /** The number of constant ids in the list. */ - size_t size; - - /** The number of constant ids that have been allocated in the list. */ - size_t capacity; - - /** The constant ids in the list. */ - pm_constant_id_t *ids; -} pm_constant_id_list_t; - -/** - * Initialize a list of constant ids. - * - * @param list The list to initialize. - */ -void pm_constant_id_list_init(pm_constant_id_list_t *list); - -/** - * Initialize a list of constant ids with a given capacity. - * - * @param list The list to initialize. - * @param capacity The initial capacity of the list. - */ -void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity); - -/** - * Append a constant id to a list of constant ids. Returns false if any - * potential reallocations fail. - * - * @param list The list to append to. - * @param id The id to append. - * @return Whether the append succeeded. - */ -bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id); - -/** - * Insert a constant id into a list of constant ids at the specified index. - * - * @param list The list to insert into. - * @param index The index at which to insert. - * @param id The id to insert. - */ -void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id); - -/** - * Checks if the current constant id list includes the given constant id. - * - * @param list The list to check. - * @param id The id to check for. - * @return Whether the list includes the given id. - */ -bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id); - -/** - * Free the memory associated with a list of constant ids. - * - * @param list The list to free. - */ -void pm_constant_id_list_free(pm_constant_id_list_t *list); - -/** - * The type of bucket in the constant pool hash map. This determines how the - * bucket should be freed. - */ -typedef unsigned int pm_constant_pool_bucket_type_t; - -/** By default, each constant is a slice of the source. */ -static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0; - -/** An owned constant is one for which memory has been allocated. */ -static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1; - -/** A constant constant is known at compile time. */ -static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2; - -/** A bucket in the hash map. */ -typedef struct { - /** The incremental ID used for indexing back into the pool. */ - unsigned int id: 30; - - /** The type of the bucket, which determines how to free it. */ - pm_constant_pool_bucket_type_t type: 2; - - /** The hash of the bucket. */ - uint32_t hash; -} pm_constant_pool_bucket_t; - -/** A constant in the pool which effectively stores a string. */ -typedef struct { - /** A pointer to the start of the string. */ - const uint8_t *start; - - /** The length of the string. */ - size_t length; -} pm_constant_t; - -/** The overall constant pool, which stores constants found while parsing. */ -typedef struct { - /** The buckets in the hash map. */ - pm_constant_pool_bucket_t *buckets; - - /** The constants that are stored in the buckets. */ - pm_constant_t *constants; - - /** The number of buckets in the hash map. */ - uint32_t size; - - /** The number of buckets that have been allocated in the hash map. */ - uint32_t capacity; -} pm_constant_pool_t; - -/** - * Initialize a new constant pool with a given capacity. - * - * @param pool The pool to initialize. - * @param capacity The initial capacity of the pool. - * @return Whether the initialization succeeded. - */ -bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity); - -/** - * Return a pointer to the constant indicated by the given constant id. - * - * @param pool The pool to get the constant from. - * @param constant_id The id of the constant to get. - * @return A pointer to the constant. - */ -pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id); - -/** - * Find a constant in a constant pool. Returns the id of the constant, or 0 if - * the constant is not found. - * - * @param pool The pool to find the constant in. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length); - -/** - * Insert a constant into a constant pool that is a slice of a source string. - * Returns the id of the constant, or 0 if any potential calls to resize fail. - * - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length); - -/** - * Insert a constant into a constant pool from memory that is now owned by the - * constant pool. Returns the id of the constant, or 0 if any potential calls to - * resize fail. - * - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length); - -/** - * Insert a constant into a constant pool from memory that is constant. Returns - * the id of the constant, or 0 if any potential calls to resize fail. - * - * @param pool The pool to insert the constant into. - * @param start A pointer to the start of the constant. - * @param length The length of the constant. - * @return The id of the constant. - */ -pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length); - -/** - * Free the memory associated with a constant pool. - * - * @param pool The pool to free. - */ -void pm_constant_pool_free(pm_constant_pool_t *pool); - -#endif diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c deleted file mode 100644 index 4170ecc58d..0000000000 --- a/prism/util/pm_integer.c +++ /dev/null @@ -1,670 +0,0 @@ -#include "prism/util/pm_integer.h" - -/** - * Pull out the length and values from the integer, regardless of the form in - * which the length/values are stored. - */ -#define INTEGER_EXTRACT(integer, length_variable, values_variable) \ - if ((integer)->values == NULL) { \ - length_variable = 1; \ - values_variable = &(integer)->value; \ - } else { \ - length_variable = (integer)->length; \ - values_variable = (integer)->values; \ - } - -/** - * Adds two positive pm_integer_t with the given base. - * Return pm_integer_t with values allocated. Not normalized. - */ -static void -big_add(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) { - size_t left_length; - uint32_t *left_values; - INTEGER_EXTRACT(left, left_length, left_values) - - size_t right_length; - uint32_t *right_values; - INTEGER_EXTRACT(right, right_length, right_values) - - size_t length = left_length < right_length ? right_length : left_length; - uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * (length + 1)); - if (values == NULL) return; - - uint64_t carry = 0; - for (size_t index = 0; index < length; index++) { - uint64_t sum = carry + (index < left_length ? left_values[index] : 0) + (index < right_length ? right_values[index] : 0); - values[index] = (uint32_t) (sum % base); - carry = sum / base; - } - - if (carry > 0) { - values[length] = (uint32_t) carry; - length++; - } - - *destination = (pm_integer_t) { length, values, 0, false }; -} - -/** - * Internal use for karatsuba_multiply. Calculates `a - b - c` with the given - * base. Assume a, b, c, a - b - c all to be positive. - * Return pm_integer_t with values allocated. Not normalized. - */ -static void -big_sub2(pm_integer_t *destination, pm_integer_t *a, pm_integer_t *b, pm_integer_t *c, uint64_t base) { - size_t a_length; - uint32_t *a_values; - INTEGER_EXTRACT(a, a_length, a_values) - - size_t b_length; - uint32_t *b_values; - INTEGER_EXTRACT(b, b_length, b_values) - - size_t c_length; - uint32_t *c_values; - INTEGER_EXTRACT(c, c_length, c_values) - - uint32_t *values = (uint32_t*) xmalloc(sizeof(uint32_t) * a_length); - int64_t carry = 0; - - for (size_t index = 0; index < a_length; index++) { - int64_t sub = ( - carry + - a_values[index] - - (index < b_length ? b_values[index] : 0) - - (index < c_length ? c_values[index] : 0) - ); - - if (sub >= 0) { - values[index] = (uint32_t) sub; - carry = 0; - } else { - sub += 2 * (int64_t) base; - values[index] = (uint32_t) ((uint64_t) sub % base); - carry = sub / (int64_t) base - 2; - } - } - - while (a_length > 1 && values[a_length - 1] == 0) a_length--; - *destination = (pm_integer_t) { a_length, values, 0, false }; -} - -/** - * Multiply two positive integers with the given base using karatsuba algorithm. - * Return pm_integer_t with values allocated. Not normalized. - */ -static void -karatsuba_multiply(pm_integer_t *destination, pm_integer_t *left, pm_integer_t *right, uint64_t base) { - size_t left_length; - uint32_t *left_values; - INTEGER_EXTRACT(left, left_length, left_values) - - size_t right_length; - uint32_t *right_values; - INTEGER_EXTRACT(right, right_length, right_values) - - if (left_length > right_length) { - size_t temporary_length = left_length; - left_length = right_length; - right_length = temporary_length; - - uint32_t *temporary_values = left_values; - left_values = right_values; - right_values = temporary_values; - } - - if (left_length <= 10) { - size_t length = left_length + right_length; - uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t)); - if (values == NULL) return; - - for (size_t left_index = 0; left_index < left_length; left_index++) { - uint32_t carry = 0; - for (size_t right_index = 0; right_index < right_length; right_index++) { - uint64_t product = (uint64_t) left_values[left_index] * right_values[right_index] + values[left_index + right_index] + carry; - values[left_index + right_index] = (uint32_t) (product % base); - carry = (uint32_t) (product / base); - } - values[left_index + right_length] = carry; - } - - while (length > 1 && values[length - 1] == 0) length--; - *destination = (pm_integer_t) { length, values, 0, false }; - return; - } - - if (left_length * 2 <= right_length) { - uint32_t *values = (uint32_t *) xcalloc(left_length + right_length, sizeof(uint32_t)); - - for (size_t start_offset = 0; start_offset < right_length; start_offset += left_length) { - size_t end_offset = start_offset + left_length; - if (end_offset > right_length) end_offset = right_length; - - pm_integer_t sliced_left = { - .length = left_length, - .values = left_values, - .value = 0, - .negative = false - }; - - pm_integer_t sliced_right = { - .length = end_offset - start_offset, - .values = right_values + start_offset, - .value = 0, - .negative = false - }; - - pm_integer_t product; - karatsuba_multiply(&product, &sliced_left, &sliced_right, base); - - uint32_t carry = 0; - for (size_t index = 0; index < product.length; index++) { - uint64_t sum = (uint64_t) values[start_offset + index] + product.values[index] + carry; - values[start_offset + index] = (uint32_t) (sum % base); - carry = (uint32_t) (sum / base); - } - - if (carry > 0) values[start_offset + product.length] += carry; - pm_integer_free(&product); - } - - *destination = (pm_integer_t) { left_length + right_length, values, 0, false }; - return; - } - - size_t half = left_length / 2; - pm_integer_t x0 = { half, left_values, 0, false }; - pm_integer_t x1 = { left_length - half, left_values + half, 0, false }; - pm_integer_t y0 = { half, right_values, 0, false }; - pm_integer_t y1 = { right_length - half, right_values + half, 0, false }; - - pm_integer_t z0 = { 0 }; - karatsuba_multiply(&z0, &x0, &y0, base); - - pm_integer_t z2 = { 0 }; - karatsuba_multiply(&z2, &x1, &y1, base); - - // For simplicity to avoid considering negative values, - // use `z1 = (x0 + x1) * (y0 + y1) - z0 - z2` instead of original karatsuba algorithm. - pm_integer_t x01 = { 0 }; - big_add(&x01, &x0, &x1, base); - - pm_integer_t y01 = { 0 }; - big_add(&y01, &y0, &y1, base); - - pm_integer_t xy = { 0 }; - karatsuba_multiply(&xy, &x01, &y01, base); - - pm_integer_t z1; - big_sub2(&z1, &xy, &z0, &z2, base); - - size_t length = left_length + right_length; - uint32_t *values = (uint32_t*) xcalloc(length, sizeof(uint32_t)); - - assert(z0.values != NULL); - memcpy(values, z0.values, sizeof(uint32_t) * z0.length); - - assert(z2.values != NULL); - memcpy(values + 2 * half, z2.values, sizeof(uint32_t) * z2.length); - - uint32_t carry = 0; - for(size_t index = 0; index < z1.length; index++) { - uint64_t sum = (uint64_t) carry + values[index + half] + z1.values[index]; - values[index + half] = (uint32_t) (sum % base); - carry = (uint32_t) (sum / base); - } - - for(size_t index = half + z1.length; carry > 0; index++) { - uint64_t sum = (uint64_t) carry + values[index]; - values[index] = (uint32_t) (sum % base); - carry = (uint32_t) (sum / base); - } - - while (length > 1 && values[length - 1] == 0) length--; - pm_integer_free(&z0); - pm_integer_free(&z1); - pm_integer_free(&z2); - pm_integer_free(&x01); - pm_integer_free(&y01); - pm_integer_free(&xy); - - *destination = (pm_integer_t) { length, values, 0, false }; -} - -/** - * The values of a hexadecimal digit, where the index is the ASCII character. - * Note that there's an odd exception here where _ is mapped to 0. This is - * because it's possible for us to end up trying to parse a number that has - * already had an error attached to it, and we want to provide _something_ to - * the user. - */ -static const int8_t pm_integer_parse_digit_values[256] = { -// 0 1 2 3 4 5 6 7 8 9 A B C D E F - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, // 5x - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Fx -}; - -/** - * Return the value of a hexadecimal digit in a uint8_t. - */ -static uint8_t -pm_integer_parse_digit(const uint8_t character) { - int8_t value = pm_integer_parse_digit_values[character]; - assert(value != -1 && "invalid digit"); - - return (uint8_t) value; -} - -/** - * Create a pm_integer_t from uint64_t with the given base. It is assumed that - * the memory for the pm_integer_t pointer has been zeroed. - */ -static void -pm_integer_from_uint64(pm_integer_t *integer, uint64_t value, uint64_t base) { - if (value < base) { - integer->value = (uint32_t) value; - return; - } - - size_t length = 0; - uint64_t length_value = value; - while (length_value > 0) { - length++; - length_value /= base; - } - - uint32_t *values = (uint32_t *) xmalloc(sizeof(uint32_t) * length); - if (values == NULL) return; - - for (size_t value_index = 0; value_index < length; value_index++) { - values[value_index] = (uint32_t) (value % base); - value /= base; - } - - integer->length = length; - integer->values = values; -} - -/** - * Normalize pm_integer_t. - * Heading zero values will be removed. If the integer fits into uint32_t, - * values is set to NULL, length is set to 0, and value field will be used. - */ -static void -pm_integer_normalize(pm_integer_t *integer) { - if (integer->values == NULL) { - return; - } - - while (integer->length > 1 && integer->values[integer->length - 1] == 0) { - integer->length--; - } - - if (integer->length > 1) { - return; - } - - uint32_t value = integer->values[0]; - bool negative = integer->negative && value != 0; - - pm_integer_free(integer); - *integer = (pm_integer_t) { .values = NULL, .value = value, .length = 0, .negative = negative }; -} - -/** - * Convert base of the integer. - * In practice, it converts 10**9 to 1<<32 or 1<<32 to 10**9. - */ -static void -pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, uint64_t base_from, uint64_t base_to) { - size_t source_length; - const uint32_t *source_values; - INTEGER_EXTRACT(source, source_length, source_values) - - size_t bigints_length = (source_length + 1) / 2; - assert(bigints_length > 0); - - pm_integer_t *bigints = (pm_integer_t *) xcalloc(bigints_length, sizeof(pm_integer_t)); - if (bigints == NULL) return; - - for (size_t index = 0; index < source_length; index += 2) { - uint64_t value = source_values[index] + base_from * (index + 1 < source_length ? source_values[index + 1] : 0); - pm_integer_from_uint64(&bigints[index / 2], value, base_to); - } - - pm_integer_t base = { 0 }; - pm_integer_from_uint64(&base, base_from, base_to); - - while (bigints_length > 1) { - pm_integer_t next_base; - karatsuba_multiply(&next_base, &base, &base, base_to); - - pm_integer_free(&base); - base = next_base; - - size_t next_length = (bigints_length + 1) / 2; - pm_integer_t *next_bigints = (pm_integer_t *) xcalloc(next_length, sizeof(pm_integer_t)); - - for (size_t bigints_index = 0; bigints_index < bigints_length; bigints_index += 2) { - if (bigints_index + 1 == bigints_length) { - next_bigints[bigints_index / 2] = bigints[bigints_index]; - } else { - pm_integer_t multiplied = { 0 }; - karatsuba_multiply(&multiplied, &base, &bigints[bigints_index + 1], base_to); - - big_add(&next_bigints[bigints_index / 2], &bigints[bigints_index], &multiplied, base_to); - pm_integer_free(&bigints[bigints_index]); - pm_integer_free(&bigints[bigints_index + 1]); - pm_integer_free(&multiplied); - } - } - - xfree(bigints); - bigints = next_bigints; - bigints_length = next_length; - } - - *destination = bigints[0]; - destination->negative = source->negative; - pm_integer_normalize(destination); - - xfree(bigints); - pm_integer_free(&base); -} - -#undef INTEGER_EXTRACT - -/** - * Convert digits to integer with the given power-of-two base. - */ -static void -pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *digits, size_t digits_length) { - size_t bit = 1; - while (base > (uint32_t) (1 << bit)) bit++; - - size_t length = (digits_length * bit + 31) / 32; - uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t)); - - for (size_t digit_index = 0; digit_index < digits_length; digit_index++) { - size_t bit_position = bit * (digits_length - digit_index - 1); - uint32_t value = digits[digit_index]; - - size_t index = bit_position / 32; - size_t shift = bit_position % 32; - - values[index] |= value << shift; - if (32 - shift < bit) values[index + 1] |= value >> (32 - shift); - } - - while (length > 1 && values[length - 1] == 0) length--; - *integer = (pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false }; - pm_integer_normalize(integer); -} - -/** - * Convert decimal digits to pm_integer_t. - */ -static void -pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) { - const size_t batch = 9; - size_t length = (digits_length + batch - 1) / batch; - - uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t)); - uint32_t value = 0; - - for (size_t digits_index = 0; digits_index < digits_length; digits_index++) { - value = value * 10 + digits[digits_index]; - - size_t reverse_index = digits_length - digits_index - 1; - if (reverse_index % batch == 0) { - values[reverse_index / batch] = value; - value = 0; - } - } - - // Convert base from 10**9 to 1<<32. - pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32)); - xfree(values); -} - -/** - * Parse a large integer from a string that does not fit into uint32_t. - */ -static void -pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) { - // Allocate an array to store digits. - uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start)); - size_t digits_length = 0; - - for (; start < end; start++) { - if (*start == '_') continue; - digits[digits_length++] = pm_integer_parse_digit(*start); - } - - // Construct pm_integer_t from the digits. - if (multiplier == 10) { - pm_integer_parse_decimal(integer, digits, digits_length); - } else { - pm_integer_parse_powof2(integer, multiplier, digits, digits_length); - } - - xfree(digits); -} - -/** - * Parse an integer from a string. This assumes that the format of the integer - * has already been validated, as internal validation checks are not performed - * here. - */ -void -pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end) { - // Ignore unary +. Unary - is parsed differently and will not end up here. - // Instead, it will modify the parsed integer later. - if (*start == '+') start++; - - // Determine the multiplier from the base, and skip past any prefixes. - uint32_t multiplier = 10; - switch (base) { - case PM_INTEGER_BASE_DEFAULT: - while (*start == '0') start++; // 01 -> 1 - break; - case PM_INTEGER_BASE_BINARY: - start += 2; // 0b - multiplier = 2; - break; - case PM_INTEGER_BASE_OCTAL: - start++; // 0 - if (*start == '_' || *start == 'o' || *start == 'O') start++; // o - multiplier = 8; - break; - case PM_INTEGER_BASE_DECIMAL: - if (*start == '0' && (end - start) > 1) start += 2; // 0d - break; - case PM_INTEGER_BASE_HEXADECIMAL: - start += 2; // 0x - multiplier = 16; - break; - case PM_INTEGER_BASE_UNKNOWN: - if (*start == '0' && (end - start) > 1) { - switch (start[1]) { - case '_': start += 2; multiplier = 8; break; - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': start++; multiplier = 8; break; - case 'b': case 'B': start += 2; multiplier = 2; break; - case 'o': case 'O': start += 2; multiplier = 8; break; - case 'd': case 'D': start += 2; break; - case 'x': case 'X': start += 2; multiplier = 16; break; - default: assert(false && "unreachable"); break; - } - } - break; - } - - // It's possible that we've consumed everything at this point if there is an - // invalid integer. If this is the case, we'll just return 0. - if (start >= end) return; - - const uint8_t *cursor = start; - uint64_t value = (uint64_t) pm_integer_parse_digit(*cursor++); - - for (; cursor < end; cursor++) { - if (*cursor == '_') continue; - value = value * multiplier + (uint64_t) pm_integer_parse_digit(*cursor); - - if (value > UINT32_MAX) { - // If the integer is too large to fit into a single uint32_t, then - // we'll parse it as a big integer. - pm_integer_parse_big(integer, multiplier, start, end); - return; - } - } - - integer->value = (uint32_t) value; -} - -/** - * Compare two integers. This function returns -1 if the left integer is less - * than the right integer, 0 if they are equal, and 1 if the left integer is - * greater than the right integer. - */ -int -pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) { - if (left->negative != right->negative) return left->negative ? -1 : 1; - int negative = left->negative ? -1 : 1; - - if (left->values == NULL && right->values == NULL) { - if (left->value < right->value) return -1 * negative; - if (left->value > right->value) return 1 * negative; - return 0; - } - - if (left->values == NULL || left->length < right->length) return -1 * negative; - if (right->values == NULL || left->length > right->length) return 1 * negative; - - for (size_t index = 0; index < left->length; index++) { - size_t value_index = left->length - index - 1; - uint32_t left_value = left->values[value_index]; - uint32_t right_value = right->values[value_index]; - - if (left_value < right_value) return -1 * negative; - if (left_value > right_value) return 1 * negative; - } - - return 0; -} - -/** - * Reduce a ratio of integers to its simplest form. - */ -void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) { - // If either the numerator or denominator do not fit into a 32-bit integer, - // then this function is a no-op. In the future, we may consider reducing - // even the larger numbers, but for now we're going to keep it simple. - if ( - // If the numerator doesn't fit into a 32-bit integer, return early. - numerator->length != 0 || - // If the denominator doesn't fit into a 32-bit integer, return early. - denominator->length != 0 || - // If the numerator is 0, then return early. - numerator->value == 0 || - // If the denominator is 1, then return early. - denominator->value == 1 - ) return; - - // Find the greatest common divisor of the numerator and denominator. - uint32_t divisor = numerator->value; - uint32_t remainder = denominator->value; - - while (remainder != 0) { - uint32_t temporary = remainder; - remainder = divisor % remainder; - divisor = temporary; - } - - // Divide the numerator and denominator by the greatest common divisor. - numerator->value /= divisor; - denominator->value /= divisor; -} - -/** - * Convert an integer to a decimal string. - */ -PRISM_EXPORTED_FUNCTION void -pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) { - if (integer->negative) { - pm_buffer_append_byte(buffer, '-'); - } - - // If the integer fits into a single uint32_t, then we can just append the - // value directly to the buffer. - if (integer->values == NULL) { - pm_buffer_append_format(buffer, "%" PRIu32, integer->value); - return; - } - - // If the integer is two uint32_t values, then we can | them together and - // append the result to the buffer. - if (integer->length == 2) { - const uint64_t value = ((uint64_t) integer->values[0]) | ((uint64_t) integer->values[1] << 32); - pm_buffer_append_format(buffer, "%" PRIu64, value); - return; - } - - // Otherwise, first we'll convert the base from 1<<32 to 10**9. - pm_integer_t converted = { 0 }; - pm_integer_convert_base(&converted, integer, (uint64_t) 1 << 32, 1000000000); - - if (converted.values == NULL) { - pm_buffer_append_format(buffer, "%" PRIu32, converted.value); - pm_integer_free(&converted); - return; - } - - // Allocate a buffer that we'll copy the decimal digits into. - size_t digits_length = converted.length * 9; - char *digits = xcalloc(digits_length, sizeof(char)); - if (digits == NULL) return; - - // Pack bigdecimal to digits. - for (size_t value_index = 0; value_index < converted.length; value_index++) { - uint32_t value = converted.values[value_index]; - - for (size_t digit_index = 0; digit_index < 9; digit_index++) { - digits[digits_length - 9 * value_index - digit_index - 1] = (char) ('0' + value % 10); - value /= 10; - } - } - - size_t start_offset = 0; - while (start_offset < digits_length - 1 && digits[start_offset] == '0') start_offset++; - - // Finally, append the string to the buffer and free the digits. - pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset); - xfree(digits); - pm_integer_free(&converted); -} - -/** - * Free the internal memory of an integer. This memory will only be allocated if - * the integer exceeds the size of a single uint32_t. - */ -PRISM_EXPORTED_FUNCTION void -pm_integer_free(pm_integer_t *integer) { - if (integer->values) { - xfree(integer->values); - } -} diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h deleted file mode 100644 index a9e2966703..0000000000 --- a/prism/util/pm_integer.h +++ /dev/null @@ -1,126 +0,0 @@ -/** - * @file pm_integer.h - * - * This module provides functions for working with arbitrary-sized integers. - */ -#ifndef PRISM_NUMBER_H -#define PRISM_NUMBER_H - -#include "prism/defines.h" -#include "prism/util/pm_buffer.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdlib.h> - -/** - * A structure represents an arbitrary-sized integer. - */ -typedef struct { - /** - * The number of allocated values. length is set to 0 if the integer fits - * into uint32_t. - */ - size_t length; - - /** - * List of 32-bit integers. Set to NULL if the integer fits into uint32_t. - */ - uint32_t *values; - - /** - * Embedded value for small integer. This value is set to 0 if the value - * does not fit into uint32_t. - */ - uint32_t value; - - /** - * Whether or not the integer is negative. It is stored this way so that a - * zeroed pm_integer_t is always positive zero. - */ - bool negative; -} pm_integer_t; - -/** - * An enum controlling the base of an integer. It is expected that the base is - * already known before parsing the integer, even though it could be derived - * from the string itself. - */ -typedef enum { - /** The default decimal base, with no prefix. Leading 0s will be ignored. */ - PM_INTEGER_BASE_DEFAULT, - - /** The binary base, indicated by a 0b or 0B prefix. */ - PM_INTEGER_BASE_BINARY, - - /** The octal base, indicated by a 0, 0o, or 0O prefix. */ - PM_INTEGER_BASE_OCTAL, - - /** The decimal base, indicated by a 0d, 0D, or empty prefix. */ - PM_INTEGER_BASE_DECIMAL, - - /** The hexadecimal base, indicated by a 0x or 0X prefix. */ - PM_INTEGER_BASE_HEXADECIMAL, - - /** - * An unknown base, in which case pm_integer_parse will derive it based on - * the content of the string. This is less efficient and does more - * comparisons, so if callers know the base ahead of time, they should use - * that instead. - */ - PM_INTEGER_BASE_UNKNOWN -} pm_integer_base_t; - -/** - * Parse an integer from a string. This assumes that the format of the integer - * has already been validated, as internal validation checks are not performed - * here. - * - * @param integer The integer to parse into. - * @param base The base of the integer. - * @param start The start of the string. - * @param end The end of the string. - */ -void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end); - -/** - * Compare two integers. This function returns -1 if the left integer is less - * than the right integer, 0 if they are equal, and 1 if the left integer is - * greater than the right integer. - * - * @param left The left integer to compare. - * @param right The right integer to compare. - * @return The result of the comparison. - */ -int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right); - -/** - * Reduce a ratio of integers to its simplest form. - * - * If either the numerator or denominator do not fit into a 32-bit integer, then - * this function is a no-op. In the future, we may consider reducing even the - * larger numbers, but for now we're going to keep it simple. - * - * @param numerator The numerator of the ratio. - * @param denominator The denominator of the ratio. - */ -void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator); - -/** - * Convert an integer to a decimal string. - * - * @param buffer The buffer to append the string to. - * @param integer The integer to convert to a string. - */ -PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer); - -/** - * Free the internal memory of an integer. This memory will only be allocated if - * the integer exceeds the size of a single node in the linked list. - * - * @param integer The integer to free. - */ -PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer); - -#endif diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c deleted file mode 100644 index ad2294cd60..0000000000 --- a/prism/util/pm_list.c +++ /dev/null @@ -1,49 +0,0 @@ -#include "prism/util/pm_list.h" - -/** - * Returns true if the given list is empty. - */ -PRISM_EXPORTED_FUNCTION bool -pm_list_empty_p(pm_list_t *list) { - return list->head == NULL; -} - -/** - * Returns the size of the list. - */ -PRISM_EXPORTED_FUNCTION size_t -pm_list_size(pm_list_t *list) { - return list->size; -} - -/** - * Append a node to the given list. - */ -void -pm_list_append(pm_list_t *list, pm_list_node_t *node) { - if (list->head == NULL) { - list->head = node; - } else { - list->tail->next = node; - } - - list->tail = node; - list->size++; -} - -/** - * Deallocate the internal state of the given list. - */ -PRISM_EXPORTED_FUNCTION void -pm_list_free(pm_list_t *list) { - pm_list_node_t *node = list->head; - pm_list_node_t *next; - - while (node != NULL) { - next = node->next; - xfree(node); - node = next; - } - - list->size = 0; -} diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h deleted file mode 100644 index 3512dee979..0000000000 --- a/prism/util/pm_list.h +++ /dev/null @@ -1,97 +0,0 @@ -/** - * @file pm_list.h - * - * An abstract linked list. - */ -#ifndef PRISM_LIST_H -#define PRISM_LIST_H - -#include "prism/defines.h" - -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> -#include <stdlib.h> - -/** - * This struct represents an abstract linked list that provides common - * functionality. It is meant to be used any time a linked list is necessary to - * store data. - * - * The linked list itself operates off a set of pointers. Because the pointers - * are not necessarily sequential, they can be of any size. We use this fact to - * allow the consumer of this linked list to extend the node struct to include - * any data they want. This is done by using the pm_list_node_t as the first - * member of the struct. - * - * For example, if we want to store a list of integers, we can do the following: - * - * ```c - * typedef struct { - * pm_list_node_t node; - * int value; - * } pm_int_node_t; - * - * pm_list_t list = { 0 }; - * pm_int_node_t *node = xmalloc(sizeof(pm_int_node_t)); - * node->value = 5; - * - * pm_list_append(&list, &node->node); - * ``` - * - * The pm_list_t struct is used to represent the overall linked list. It - * contains a pointer to the head and tail of the list. This allows for easy - * iteration and appending of new nodes. - */ -typedef struct pm_list_node { - /** A pointer to the next node in the list. */ - struct pm_list_node *next; -} pm_list_node_t; - -/** - * This represents the overall linked list. It keeps a pointer to the head and - * tail so that iteration is easy and pushing new nodes is easy. - */ -typedef struct { - /** The size of the list. */ - size_t size; - - /** A pointer to the head of the list. */ - pm_list_node_t *head; - - /** A pointer to the tail of the list. */ - pm_list_node_t *tail; -} pm_list_t; - -/** - * Returns true if the given list is empty. - * - * @param list The list to check. - * @return True if the given list is empty, otherwise false. - */ -PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list); - -/** - * Returns the size of the list. - * - * @param list The list to check. - * @return The size of the list. - */ -PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list); - -/** - * Append a node to the given list. - * - * @param list The list to append to. - * @param node The node to append. - */ -void pm_list_append(pm_list_t *list, pm_list_node_t *node); - -/** - * Deallocate the internal state of the given list. - * - * @param list The list to free. - */ -PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list); - -#endif diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c deleted file mode 100644 index 7ea20ace6d..0000000000 --- a/prism/util/pm_memchr.c +++ /dev/null @@ -1,35 +0,0 @@ -#include "prism/util/pm_memchr.h" - -#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40 - -/** - * We need to roll our own memchr to handle cases where the encoding changes and - * we need to search for a character in a buffer that could be the trailing byte - * of a multibyte character. - */ -void * -pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) { - if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) { - const uint8_t *source = (const uint8_t *) memory; - size_t index = 0; - - while (index < number) { - if (source[index] == character) { - return (void *) (source + index); - } - - size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index)); - if (width == 0) { - return NULL; - } - - index += width; - } - - return NULL; - } else { - return memchr(memory, character, number); - } -} - -#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h deleted file mode 100644 index e0671eaed3..0000000000 --- a/prism/util/pm_memchr.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * @file pm_memchr.h - * - * A custom memchr implementation. - */ -#ifndef PRISM_MEMCHR_H -#define PRISM_MEMCHR_H - -#include "prism/defines.h" -#include "prism/encoding.h" - -#include <stddef.h> - -/** - * We need to roll our own memchr to handle cases where the encoding changes and - * we need to search for a character in a buffer that could be the trailing byte - * of a multibyte character. - * - * @param source The source string. - * @param character The character to search for. - * @param number The maximum number of bytes to search. - * @param encoding_changed Whether the encoding changed. - * @param encoding A pointer to the encoding. - * @return A pointer to the first occurrence of the character in the source - * string, or NULL if no such character exists. - */ -void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding); - -#endif diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c deleted file mode 100644 index 8331618f54..0000000000 --- a/prism/util/pm_newline_list.c +++ /dev/null @@ -1,125 +0,0 @@ -#include "prism/util/pm_newline_list.h" - -/** - * Initialize a new newline list with the given capacity. Returns true if the - * allocation of the offsets succeeds, otherwise returns false. - */ -bool -pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) { - list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t)); - if (list->offsets == NULL) return false; - - list->start = start; - - // This is 1 instead of 0 because we want to include the first line of the - // file as having offset 0, which is set because of calloc. - list->size = 1; - list->capacity = capacity; - - return true; -} - -/** - * Clear out the newlines that have been appended to the list. - */ -void -pm_newline_list_clear(pm_newline_list_t *list) { - list->size = 1; -} - -/** - * Append a new offset to the newline list. Returns true if the reallocation of - * the offsets succeeds (if one was necessary), otherwise returns false. - */ -bool -pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { - if (list->size == list->capacity) { - size_t *original_offsets = list->offsets; - - list->capacity = (list->capacity * 3) / 2; - list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t)); - if (list->offsets == NULL) return false; - - memcpy(list->offsets, original_offsets, list->size * sizeof(size_t)); - xfree(original_offsets); - } - - assert(*cursor == '\n'); - assert(cursor >= list->start); - size_t newline_offset = (size_t) (cursor - list->start + 1); - - assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]); - list->offsets[list->size++] = newline_offset; - - return true; -} - -/** - * Returns the line of the given offset. If the offset is not in the list, the - * line of the closest offset less than the given offset is returned. - */ -int32_t -pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) { - assert(cursor >= list->start); - size_t offset = (size_t) (cursor - list->start); - - size_t left = 0; - size_t right = list->size - 1; - - while (left <= right) { - size_t mid = left + (right - left) / 2; - - if (list->offsets[mid] == offset) { - return ((int32_t) mid) + start_line; - } - - if (list->offsets[mid] < offset) { - left = mid + 1; - } else { - right = mid - 1; - } - } - - return ((int32_t) left) + start_line - 1; -} - -/** - * Returns the line and column of the given offset. If the offset is not in the - * list, the line and column of the closest offset less than the given offset - * are returned. - */ -pm_line_column_t -pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) { - assert(cursor >= list->start); - size_t offset = (size_t) (cursor - list->start); - - size_t left = 0; - size_t right = list->size - 1; - - while (left <= right) { - size_t mid = left + (right - left) / 2; - - if (list->offsets[mid] == offset) { - return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 }); - } - - if (list->offsets[mid] < offset) { - left = mid + 1; - } else { - right = mid - 1; - } - } - - return ((pm_line_column_t) { - .line = ((int32_t) left) + start_line - 1, - .column = (uint32_t) (offset - list->offsets[left - 1]) - }); -} - -/** - * Free the internal memory allocated for the newline list. - */ -void -pm_newline_list_free(pm_newline_list_t *list) { - xfree(list->offsets); -} diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h deleted file mode 100644 index 406abe8ba5..0000000000 --- a/prism/util/pm_newline_list.h +++ /dev/null @@ -1,113 +0,0 @@ -/** - * @file pm_newline_list.h - * - * A list of byte offsets of newlines in a string. - * - * When compiling the syntax tree, it's necessary to know the line and column - * of many nodes. This is necessary to support things like error messages, - * tracepoints, etc. - * - * It's possible that we could store the start line, start column, end line, and - * end column on every node in addition to the offsets that we already store, - * but that would be quite a lot of memory overhead. - */ -#ifndef PRISM_NEWLINE_LIST_H -#define PRISM_NEWLINE_LIST_H - -#include "prism/defines.h" - -#include <assert.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdlib.h> - -/** - * A list of offsets of newlines in a string. The offsets are assumed to be - * sorted/inserted in ascending order. - */ -typedef struct { - /** A pointer to the start of the source string. */ - const uint8_t *start; - - /** The number of offsets in the list. */ - size_t size; - - /** The capacity of the list that has been allocated. */ - size_t capacity; - - /** The list of offsets. */ - size_t *offsets; -} pm_newline_list_t; - -/** - * A line and column in a string. - */ -typedef struct { - /** The line number. */ - int32_t line; - - /** The column number. */ - uint32_t column; -} pm_line_column_t; - -/** - * Initialize a new newline list with the given capacity. Returns true if the - * allocation of the offsets succeeds, otherwise returns false. - * - * @param list The list to initialize. - * @param start A pointer to the start of the source string. - * @param capacity The initial capacity of the list. - * @return True if the allocation of the offsets succeeds, otherwise false. - */ -bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); - -/** - * Clear out the newlines that have been appended to the list. - * - * @param list The list to clear. - */ -void -pm_newline_list_clear(pm_newline_list_t *list); - -/** - * Append a new offset to the newline list. Returns true if the reallocation of - * the offsets succeeds (if one was necessary), otherwise returns false. - * - * @param list The list to append to. - * @param cursor A pointer to the offset to append. - * @return True if the reallocation of the offsets succeeds (if one was - * necessary), otherwise false. - */ -bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor); - -/** - * Returns the line of the given offset. If the offset is not in the list, the - * line of the closest offset less than the given offset is returned. - * - * @param list The list to search. - * @param cursor A pointer to the offset to search for. - * @param start_line The line to start counting from. - * @return The line of the given offset. - */ -int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line); - -/** - * Returns the line and column of the given offset. If the offset is not in the - * list, the line and column of the closest offset less than the given offset - * are returned. - * - * @param list The list to search. - * @param cursor A pointer to the offset to search for. - * @param start_line The line to start counting from. - * @return The line and column of the given offset. - */ -pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line); - -/** - * Free the internal memory allocated for the newline list. - * - * @param list The list to free. - */ -void pm_newline_list_free(pm_newline_list_t *list); - -#endif diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c deleted file mode 100644 index 75422fbdf2..0000000000 --- a/prism/util/pm_string.c +++ /dev/null @@ -1,383 +0,0 @@ -#include "prism/util/pm_string.h" - -/** - * Returns the size of the pm_string_t struct. This is necessary to allocate the - * correct amount of memory in the FFI backend. - */ -PRISM_EXPORTED_FUNCTION size_t -pm_string_sizeof(void) { - return sizeof(pm_string_t); -} - -/** - * Initialize a shared string that is based on initial input. - */ -void -pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) { - assert(start <= end); - - *string = (pm_string_t) { - .type = PM_STRING_SHARED, - .source = start, - .length = (size_t) (end - start) - }; -} - -/** - * Initialize an owned string that is responsible for freeing allocated memory. - */ -void -pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) { - *string = (pm_string_t) { - .type = PM_STRING_OWNED, - .source = source, - .length = length - }; -} - -/** - * Initialize a constant string that doesn't own its memory source. - */ -void -pm_string_constant_init(pm_string_t *string, const char *source, size_t length) { - *string = (pm_string_t) { - .type = PM_STRING_CONSTANT, - .source = (const uint8_t *) source, - .length = length - }; -} - -#ifdef _WIN32 -/** - * Represents a file handle on Windows, where the path will need to be freed - * when the file is closed. - */ -typedef struct { - /** The path to the file, which will become allocated memory. */ - WCHAR *path; - - /** The handle to the file, which will start as uninitialized memory. */ - HANDLE file; -} pm_string_file_handle_t; - -/** - * Open the file indicated by the filepath parameter for reading on Windows. - * Perform any kind of normalization that needs to happen on the filepath. - */ -static pm_string_init_result_t -pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) { - int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0); - if (length == 0) return PM_STRING_INIT_ERROR_GENERIC; - - handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length)); - if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) { - xfree(handle->path); - return PM_STRING_INIT_ERROR_GENERIC; - } - - handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); - if (handle->file == INVALID_HANDLE_VALUE) { - pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC; - - if (GetLastError() == ERROR_ACCESS_DENIED) { - DWORD attributes = GetFileAttributesW(handle->path); - if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - result = PM_STRING_INIT_ERROR_DIRECTORY; - } - } - - xfree(handle->path); - return result; - } - - return PM_STRING_INIT_SUCCESS; -} - -/** - * Close the file handle and free the path. - */ -static void -pm_string_file_handle_close(pm_string_file_handle_t *handle) { - xfree(handle->path); - CloseHandle(handle->file); -} -#endif - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. - * - * We want to use demand paging as much as possible in order to avoid having to - * read the entire file into memory (which could be detrimental to performance - * for large files). This means that if we're on windows we'll use - * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use - * `mmap`, and on other POSIX systems we'll use `read`. - */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t -pm_string_mapped_init(pm_string_t *string, const char *filepath) { -#ifdef _WIN32 - // Open the file for reading. - pm_string_file_handle_t handle; - pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath); - if (result != PM_STRING_INIT_SUCCESS) return result; - - // Get the file size. - DWORD file_size = GetFileSize(handle.file, NULL); - if (file_size == INVALID_FILE_SIZE) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (file_size == 0) { - pm_string_file_handle_close(&handle); - const uint8_t source[] = ""; - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - // Create a mapping of the file. - HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL); - if (mapping == NULL) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Map the file into memory. - uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); - CloseHandle(mapping); - pm_string_file_handle_close(&handle); - - if (source == NULL) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size }; - return PM_STRING_INIT_SUCCESS; -#elif defined(_POSIX_MAPPED_FILES) - // Open the file for reading - int fd = open(filepath, O_RDONLY); - if (fd == -1) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Ensure it is a file and not a directory - if (S_ISDIR(sb.st_mode)) { - close(fd); - return PM_STRING_INIT_ERROR_DIRECTORY; - } - - // mmap the file descriptor to virtually get the contents - size_t size = (size_t) sb.st_size; - uint8_t *source = NULL; - - if (size == 0) { - close(fd); - const uint8_t source[] = ""; - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - if (source == MAP_FAILED) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - close(fd); - *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size }; - return PM_STRING_INIT_SUCCESS; -#else - return pm_string_file_init(string, filepath); -#endif -} - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. - */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t -pm_string_file_init(pm_string_t *string, const char *filepath) { -#ifdef _WIN32 - // Open the file for reading. - pm_string_file_handle_t handle; - pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath); - if (result != PM_STRING_INIT_SUCCESS) return result; - - // Get the file size. - DWORD file_size = GetFileSize(handle.file, NULL); - if (file_size == INVALID_FILE_SIZE) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (file_size == 0) { - pm_string_file_handle_close(&handle); - const uint8_t source[] = ""; - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - // Create a buffer to read the file into. - uint8_t *source = xmalloc(file_size); - if (source == NULL) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Read the contents of the file - DWORD bytes_read; - if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) { - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Check the number of bytes read - if (bytes_read != file_size) { - xfree(source); - pm_string_file_handle_close(&handle); - return PM_STRING_INIT_ERROR_GENERIC; - } - - pm_string_file_handle_close(&handle); - *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size }; - return PM_STRING_INIT_SUCCESS; -#elif defined(PRISM_HAS_FILESYSTEM) - // Open the file for reading - int fd = open(filepath, O_RDONLY); - if (fd == -1) { - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - // Ensure it is a file and not a directory - if (S_ISDIR(sb.st_mode)) { - close(fd); - return PM_STRING_INIT_ERROR_DIRECTORY; - } - - // Check the size to see if it's empty - size_t size = (size_t) sb.st_size; - if (size == 0) { - close(fd); - const uint8_t source[] = ""; - *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; - return PM_STRING_INIT_SUCCESS; - } - - size_t length = (size_t) size; - uint8_t *source = xmalloc(length); - if (source == NULL) { - close(fd); - return PM_STRING_INIT_ERROR_GENERIC; - } - - long bytes_read = (long) read(fd, source, length); - close(fd); - - if (bytes_read == -1) { - xfree(source); - return PM_STRING_INIT_ERROR_GENERIC; - } - - *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length }; - return PM_STRING_INIT_SUCCESS; -#else - (void) string; - (void) filepath; - perror("pm_string_file_init is not implemented for this platform"); - return PM_STRING_INIT_ERROR_GENERIC; -#endif -} - -/** - * Ensure the string is owned. If it is not, then reinitialize it as owned and - * copy over the previous source. - */ -void -pm_string_ensure_owned(pm_string_t *string) { - if (string->type == PM_STRING_OWNED) return; - - size_t length = pm_string_length(string); - const uint8_t *source = pm_string_source(string); - - uint8_t *memory = xmalloc(length); - if (!memory) return; - - pm_string_owned_init(string, memory, length); - memcpy((void *) string->source, source, length); -} - -/** - * Compare the underlying lengths and bytes of two strings. Returns 0 if the - * strings are equal, a negative number if the left string is less than the - * right string, and a positive number if the left string is greater than the - * right string. - */ -int -pm_string_compare(const pm_string_t *left, const pm_string_t *right) { - size_t left_length = pm_string_length(left); - size_t right_length = pm_string_length(right); - - if (left_length < right_length) { - return -1; - } else if (left_length > right_length) { - return 1; - } - - return memcmp(pm_string_source(left), pm_string_source(right), left_length); -} - -/** - * Returns the length associated with the string. - */ -PRISM_EXPORTED_FUNCTION size_t -pm_string_length(const pm_string_t *string) { - return string->length; -} - -/** - * Returns the start pointer associated with the string. - */ -PRISM_EXPORTED_FUNCTION const uint8_t * -pm_string_source(const pm_string_t *string) { - return string->source; -} - -/** - * Free the associated memory of the given string. - */ -PRISM_EXPORTED_FUNCTION void -pm_string_free(pm_string_t *string) { - void *memory = (void *) string->source; - - if (string->type == PM_STRING_OWNED) { - xfree(memory); -#ifdef PRISM_HAS_MMAP - } else if (string->type == PM_STRING_MAPPED && string->length) { -#if defined(_WIN32) - UnmapViewOfFile(memory); -#elif defined(_POSIX_MAPPED_FILES) - munmap(memory, string->length); -#endif -#endif /* PRISM_HAS_MMAP */ - } -} diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h deleted file mode 100644 index f99f1abdf3..0000000000 --- a/prism/util/pm_string.h +++ /dev/null @@ -1,190 +0,0 @@ -/** - * @file pm_string.h - * - * A generic string type that can have various ownership semantics. - */ -#ifndef PRISM_STRING_H -#define PRISM_STRING_H - -#include "prism/defines.h" - -#include <assert.h> -#include <errno.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> - -// The following headers are necessary to read files using demand paging. -#ifdef _WIN32 -#include <windows.h> -#elif defined(_POSIX_MAPPED_FILES) -#include <fcntl.h> -#include <sys/mman.h> -#include <sys/stat.h> -#elif defined(PRISM_HAS_FILESYSTEM) -#include <fcntl.h> -#include <sys/stat.h> -#endif - -/** - * A generic string type that can have various ownership semantics. - */ -typedef struct { - /** A pointer to the start of the string. */ - const uint8_t *source; - - /** The length of the string in bytes of memory. */ - size_t length; - - /** The type of the string. This field determines how the string should be freed. */ - enum { - /** This string is a constant string, and should not be freed. */ - PM_STRING_CONSTANT, - - /** This is a slice of another string, and should not be freed. */ - PM_STRING_SHARED, - - /** This string owns its memory, and should be freed using `pm_string_free`. */ - PM_STRING_OWNED, - -#ifdef PRISM_HAS_MMAP - /** This string is a memory-mapped file, and should be freed using `pm_string_free`. */ - PM_STRING_MAPPED -#endif - } type; -} pm_string_t; - -/** - * Returns the size of the pm_string_t struct. This is necessary to allocate the - * correct amount of memory in the FFI backend. - * - * @return The size of the pm_string_t struct. - */ -PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void); - -/** - * Defines an empty string. This is useful for initializing a string that will - * be filled in later. - */ -#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 }) - -/** - * Initialize a shared string that is based on initial input. - * - * @param string The string to initialize. - * @param start The start of the string. - * @param end The end of the string. - */ -void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end); - -/** - * Initialize an owned string that is responsible for freeing allocated memory. - * - * @param string The string to initialize. - * @param source The source of the string. - * @param length The length of the string. - */ -void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length); - -/** - * Initialize a constant string that doesn't own its memory source. - * - * @param string The string to initialize. - * @param source The source of the string. - * @param length The length of the string. - */ -void pm_string_constant_init(pm_string_t *string, const char *source, size_t length); - -/** - * Represents the result of calling pm_string_mapped_init or - * pm_string_file_init. We need this additional information because there is - * not a platform-agnostic way to indicate that the file that was attempted to - * be opened was a directory. - */ -typedef enum { - /** Indicates that the string was successfully initialized. */ - PM_STRING_INIT_SUCCESS = 0, - /** - * Indicates a generic error from a string_*_init function, where the type - * of error should be read from `errno` or `GetLastError()`. - */ - PM_STRING_INIT_ERROR_GENERIC = 1, - /** - * Indicates that the file that was attempted to be opened was a directory. - */ - PM_STRING_INIT_ERROR_DIRECTORY = 2 -} pm_string_init_result_t; - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. - * - * We want to use demand paging as much as possible in order to avoid having to - * read the entire file into memory (which could be detrimental to performance - * for large files). This means that if we're on windows we'll use - * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use - * `mmap`, and on other POSIX systems we'll use `read`. - * - * @param string The string to initialize. - * @param filepath The filepath to read. - * @return The success of the read, indicated by the value of the enum. - */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath); - -/** - * Read the file indicated by the filepath parameter into source and load its - * contents and size into the given `pm_string_t`. The given `pm_string_t` - * should be freed using `pm_string_free` when it is no longer used. - * - * @param string The string to initialize. - * @param filepath The filepath to read. - * @return The success of the read, indicated by the value of the enum. - */ -PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath); - -/** - * Ensure the string is owned. If it is not, then reinitialize it as owned and - * copy over the previous source. - * - * @param string The string to ensure is owned. - */ -void pm_string_ensure_owned(pm_string_t *string); - -/** - * Compare the underlying lengths and bytes of two strings. Returns 0 if the - * strings are equal, a negative number if the left string is less than the - * right string, and a positive number if the left string is greater than the - * right string. - * - * @param left The left string to compare. - * @param right The right string to compare. - * @return The comparison result. - */ -int pm_string_compare(const pm_string_t *left, const pm_string_t *right); - -/** - * Returns the length associated with the string. - * - * @param string The string to get the length of. - * @return The length of the string. - */ -PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string); - -/** - * Returns the start pointer associated with the string. - * - * @param string The string to get the start pointer of. - * @return The start pointer of the string. - */ -PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string); - -/** - * Free the associated memory of the given string. - * - * @param string The string to free. - */ -PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string); - -#endif diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c deleted file mode 100644 index 3f58421554..0000000000 --- a/prism/util/pm_strncasecmp.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "prism/util/pm_strncasecmp.h" - -/** - * A locale-insensitive version of `tolower(3)` - */ -static inline int -pm_tolower(int c) -{ - if ('A' <= c && c <= 'Z') { - return c | 0x20; - } - return c; -} - -/** - * Compare two strings, ignoring case, up to the given length. Returns 0 if the - * strings are equal, a negative number if string1 is less than string2, or a - * positive number if string1 is greater than string2. - * - * Note that this is effectively our own implementation of strncasecmp, but it's - * not available on all of the platforms we want to support so we're rolling it - * here. - */ -int -pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) { - size_t offset = 0; - int difference = 0; - - while (offset < length && string1[offset] != '\0') { - if (string2[offset] == '\0') return string1[offset]; - if ((difference = pm_tolower(string1[offset]) - pm_tolower(string2[offset])) != 0) return difference; - offset++; - } - - return difference; -} diff --git a/prism/util/pm_strncasecmp.h b/prism/util/pm_strncasecmp.h deleted file mode 100644 index 5cb88cb5eb..0000000000 --- a/prism/util/pm_strncasecmp.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * @file pm_strncasecmp.h - * - * A custom strncasecmp implementation. - */ -#ifndef PRISM_STRNCASECMP_H -#define PRISM_STRNCASECMP_H - -#include "prism/defines.h" - -#include <ctype.h> -#include <stddef.h> -#include <stdint.h> - -/** - * Compare two strings, ignoring case, up to the given length. Returns 0 if the - * strings are equal, a negative number if string1 is less than string2, or a - * positive number if string1 is greater than string2. - * - * Note that this is effectively our own implementation of strncasecmp, but it's - * not available on all of the platforms we want to support so we're rolling it - * here. - * - * @param string1 The first string to compare. - * @param string2 The second string to compare - * @param length The maximum number of characters to compare. - * @return 0 if the strings are equal, a negative number if string1 is less than - * string2, or a positive number if string1 is greater than string2. - */ -int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length); - -#endif diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c deleted file mode 100644 index 916a4cc3fd..0000000000 --- a/prism/util/pm_strpbrk.c +++ /dev/null @@ -1,206 +0,0 @@ -#include "prism/util/pm_strpbrk.h" - -/** - * Add an invalid multibyte character error to the parser. - */ -static inline void -pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start); -} - -/** - * Set the explicit encoding for the parser to the current encoding. - */ -static inline void -pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) { - if (parser->explicit_encoding != NULL) { - if (parser->explicit_encoding == parser->encoding) { - // Okay, we already locked to this encoding. - } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { - // Not okay, we already found a Unicode escape sequence and this - // conflicts. - pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name); - } else { - // Should not be anything else. - assert(false && "unreachable"); - } - } - - parser->explicit_encoding = parser->encoding; -} - -/** - * This is the default path. - */ -static inline const uint8_t * -pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; - - while (index < maximum) { - if (strchr((const char *) charset, source[index]) != NULL) { - return source + index; - } - - if (source[index] < 0x80) { - index++; - } else { - size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)); - - if (width > 0) { - index += width; - } else if (!validate) { - index++; - } else { - // At this point we know we have an invalid multibyte character. - // We'll walk forward as far as we can until we find the next - // valid character so that we don't spam the user with a ton of - // the same kind of error. - const size_t start = index; - - do { - index++; - } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); - } - } - } - - return NULL; -} - -/** - * This is the path when the encoding is ASCII-8BIT. - */ -static inline const uint8_t * -pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; - - while (index < maximum) { - if (strchr((const char *) charset, source[index]) != NULL) { - return source + index; - } - - if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1); - index++; - } - - return NULL; -} - -/** - * This is the slow path that does care about the encoding. - */ -static inline const uint8_t * -pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; - const pm_encoding_t *encoding = parser->encoding; - - while (index < maximum) { - if (strchr((const char *) charset, source[index]) != NULL) { - return source + index; - } - - if (source[index] < 0x80) { - index++; - } else { - size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index)); - if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width); - - if (width > 0) { - index += width; - } else if (!validate) { - index++; - } else { - // At this point we know we have an invalid multibyte character. - // We'll walk forward as far as we can until we find the next - // valid character so that we don't spam the user with a ton of - // the same kind of error. - const size_t start = index; - - do { - index++; - } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); - } - } - } - - return NULL; -} - -/** - * This is the fast path that does not care about the encoding because we know - * the encoding only supports single-byte characters. - */ -static inline const uint8_t * -pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) { - size_t index = 0; - const pm_encoding_t *encoding = parser->encoding; - - while (index < maximum) { - if (strchr((const char *) charset, source[index]) != NULL) { - return source + index; - } - - if (source[index] < 0x80 || !validate) { - index++; - } else { - size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index)); - pm_strpbrk_explicit_encoding_set(parser, source, width); - - if (width > 0) { - index += width; - } else { - // At this point we know we have an invalid multibyte character. - // We'll walk forward as far as we can until we find the next - // valid character so that we don't spam the user with a ton of - // the same kind of error. - const size_t start = index; - - do { - index++; - } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0); - - pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index); - } - } - } - - return NULL; -} - -/** - * Here we have rolled our own version of strpbrk. The standard library strpbrk - * has undefined behavior when the source string is not null-terminated. We want - * to support strings that are not null-terminated because pm_parse does not - * have the contract that the string is null-terminated. (This is desirable - * because it means the extension can call pm_parse with the result of a call to - * mmap). - * - * The standard library strpbrk also does not support passing a maximum length - * to search. We want to support this for the reason mentioned above, but we - * also don't want it to stop on null bytes. Ruby actually allows null bytes - * within strings, comments, regular expressions, etc. So we need to be able to - * skip past them. - * - * Finally, we want to support encodings wherein the charset could contain - * characters that are trailing bytes of multi-byte characters. For example, in - * Shift_JIS, the backslash character can be a trailing byte. In that case we - * need to take a slower path and iterate one multi-byte character at a time. - */ -const uint8_t * -pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) { - if (length <= 0) { - return NULL; - } else if (!parser->encoding_changed) { - return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate); - } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) { - return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate); - } else if (parser->encoding->multibyte) { - return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate); - } else { - return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate); - } -} diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h deleted file mode 100644 index f387bd5782..0000000000 --- a/prism/util/pm_strpbrk.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - * @file pm_strpbrk.h - * - * A custom strpbrk implementation. - */ -#ifndef PRISM_STRPBRK_H -#define PRISM_STRPBRK_H - -#include "prism/defines.h" -#include "prism/diagnostic.h" -#include "prism/parser.h" - -#include <stddef.h> -#include <string.h> - -/** - * Here we have rolled our own version of strpbrk. The standard library strpbrk - * has undefined behavior when the source string is not null-terminated. We want - * to support strings that are not null-terminated because pm_parse does not - * have the contract that the string is null-terminated. (This is desirable - * because it means the extension can call pm_parse with the result of a call to - * mmap). - * - * The standard library strpbrk also does not support passing a maximum length - * to search. We want to support this for the reason mentioned above, but we - * also don't want it to stop on null bytes. Ruby actually allows null bytes - * within strings, comments, regular expressions, etc. So we need to be able to - * skip past them. - * - * Finally, we want to support encodings wherein the charset could contain - * characters that are trailing bytes of multi-byte characters. For example, in - * Shift-JIS, the backslash character can be a trailing byte. In that case we - * need to take a slower path and iterate one multi-byte character at a time. - * - * @param parser The parser. - * @param source The source to search. - * @param charset The charset to search for. - * @param length The maximum number of bytes to search. - * @param validate Whether to validate that the source string is valid in the - * current encoding of the parser. - * @return A pointer to the first character in the source string that is in the - * charset, or NULL if no such character exists. - */ -const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate); - -#endif |
