summaryrefslogtreecommitdiff
path: root/prism
diff options
context:
space:
mode:
Diffstat (limited to 'prism')
-rw-r--r--prism/api_pack.c276
-rw-r--r--prism/arena.c117
-rw-r--r--prism/arena.h37
-rw-r--r--prism/buffer.c (renamed from prism/util/pm_buffer.c)57
-rw-r--r--prism/buffer.h52
-rw-r--r--prism/char.c (renamed from prism/util/pm_char.c)70
-rw-r--r--prism/comments.h43
-rw-r--r--prism/compiler/accel.h19
-rw-r--r--prism/compiler/align.h36
-rw-r--r--prism/compiler/exported.h24
-rw-r--r--prism/compiler/fallthrough.h22
-rw-r--r--prism/compiler/filesystem.h32
-rw-r--r--prism/compiler/flex_array.h19
-rw-r--r--prism/compiler/force_inline.h21
-rw-r--r--prism/compiler/format.h25
-rw-r--r--prism/compiler/inline.h17
-rw-r--r--prism/compiler/nodiscard.h22
-rw-r--r--prism/compiler/nonnull.h18
-rw-r--r--prism/compiler/unused.h18
-rw-r--r--prism/config.yml588
-rw-r--r--prism/constant_pool.c (renamed from prism/util/pm_constant_pool.c)212
-rw-r--r--prism/constant_pool.h81
-rw-r--r--prism/defines.h260
-rw-r--r--prism/diagnostic.h93
-rw-r--r--prism/encoding.c248
-rw-r--r--prism/excludes.h29
-rw-r--r--prism/extension.c764
-rw-r--r--prism/extension.h4
-rw-r--r--prism/integer.c (renamed from prism/util/pm_integer.c)53
-rw-r--r--prism/integer.h41
-rw-r--r--prism/internal/allocator.h68
-rw-r--r--prism/internal/allocator_debug.h88
-rw-r--r--prism/internal/arena.h108
-rw-r--r--prism/internal/bit.h42
-rw-r--r--prism/internal/buffer.h91
-rw-r--r--prism/internal/char.h139
-rw-r--r--prism/internal/comments.h20
-rw-r--r--prism/internal/constant_pool.h117
-rw-r--r--prism/internal/encoding.h (renamed from prism/encoding.h)95
-rw-r--r--prism/internal/integer.h68
-rw-r--r--prism/internal/isinf.h16
-rw-r--r--prism/internal/line_offset_list.h34
-rw-r--r--prism/internal/list.h (renamed from prism/util/pm_list.h)57
-rw-r--r--prism/internal/magic_comments.h23
-rw-r--r--prism/internal/memchr.h15
-rw-r--r--prism/internal/node.h32
-rw-r--r--prism/internal/options.h212
-rw-r--r--prism/internal/parser.h958
-rw-r--r--prism/internal/regexp.h41
-rw-r--r--prism/internal/serialize.h34
-rw-r--r--prism/internal/source.h72
-rw-r--r--prism/internal/static_literals.h (renamed from prism/static_literals.h)71
-rw-r--r--prism/internal/stringy.h30
-rw-r--r--prism/internal/strncasecmp.h (renamed from prism/util/pm_strncasecmp.h)20
-rw-r--r--prism/internal/strpbrk.h (renamed from prism/util/pm_strpbrk.h)27
-rw-r--r--prism/internal/tokens.h11
-rw-r--r--prism/json.h32
-rw-r--r--prism/line_offset_list.c100
-rw-r--r--prism/line_offset_list.h61
-rw-r--r--prism/list.c24
-rw-r--r--prism/magic_comments.h35
-rw-r--r--prism/memchr.c (renamed from prism/util/pm_memchr.c)14
-rw-r--r--prism/node.h71
-rw-r--r--prism/options.c204
-rw-r--r--prism/options.h355
-rw-r--r--prism/pack.c509
-rw-r--r--prism/pack.h163
-rw-r--r--prism/parser.c302
-rw-r--r--prism/parser.h1155
-rw-r--r--prism/prettyprint.h15
-rw-r--r--prism/prism.c13026
-rw-r--r--prism/prism.h349
-rw-r--r--prism/regexp.c1015
-rw-r--r--prism/regexp.h43
-rw-r--r--prism/serialize.h96
-rw-r--r--prism/source.c491
-rw-r--r--prism/source.h148
-rw-r--r--prism/srcs.mk160
-rw-r--r--prism/srcs.mk.in52
-rw-r--r--prism/static_literals.c70
-rw-r--r--prism/stream.h28
-rw-r--r--prism/string_query.c166
-rw-r--r--prism/string_query.h63
-rw-r--r--prism/stringy.c91
-rw-r--r--prism/stringy.h72
-rw-r--r--prism/strncasecmp.c (renamed from prism/util/pm_strncasecmp.c)9
-rw-r--r--prism/strpbrk.c439
-rw-r--r--prism/templates/ext/prism/api_node.c.erb92
-rw-r--r--prism/templates/include/prism/ast.h.erb118
-rw-r--r--prism/templates/include/prism/diagnostic.h.erb130
-rw-r--r--prism/templates/include/prism/internal/diagnostic.h.erb60
-rw-r--r--prism/templates/lib/prism/compiler.rb.erb23
-rw-r--r--prism/templates/lib/prism/dispatcher.rb.erb38
-rw-r--r--prism/templates/lib/prism/dot_visitor.rb.erb51
-rw-r--r--prism/templates/lib/prism/dsl.rb.erb47
-rw-r--r--prism/templates/lib/prism/inspect_visitor.rb.erb36
-rw-r--r--prism/templates/lib/prism/mutation_compiler.rb.erb7
-rw-r--r--prism/templates/lib/prism/node.rb.erb413
-rw-r--r--prism/templates/lib/prism/reflection.rb.erb13
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb298
-rw-r--r--prism/templates/lib/prism/visitor.rb.erb26
-rw-r--r--prism/templates/src/diagnostic.c.erb153
-rw-r--r--prism/templates/src/json.c.erb130
-rw-r--r--prism/templates/src/node.c.erb281
-rw-r--r--prism/templates/src/prettyprint.c.erb37
-rw-r--r--prism/templates/src/serialize.c.erb222
-rw-r--r--prism/templates/src/tokens.c.erb (renamed from prism/templates/src/token_type.c.erb)20
-rwxr-xr-xprism/templates/template.rb209
-rw-r--r--prism/util/pm_buffer.h228
-rw-r--r--prism/util/pm_char.h204
-rw-r--r--prism/util/pm_constant_pool.h218
-rw-r--r--prism/util/pm_integer.h126
-rw-r--r--prism/util/pm_list.c49
-rw-r--r--prism/util/pm_memchr.h29
-rw-r--r--prism/util/pm_newline_list.c125
-rw-r--r--prism/util/pm_newline_list.h113
-rw-r--r--prism/util/pm_string.c383
-rw-r--r--prism/util/pm_string.h190
-rw-r--r--prism/util/pm_strpbrk.c206
-rw-r--r--prism/version.h13
120 files changed, 16143 insertions, 13290 deletions
diff --git a/prism/api_pack.c b/prism/api_pack.c
deleted file mode 100644
index 98509ae65c..0000000000
--- a/prism/api_pack.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include "prism/extension.h"
-
-#ifdef PRISM_EXCLUDE_PACK
-
-void
-Init_prism_pack(void) {}
-
-#else
-
-static VALUE rb_cPrism;
-static VALUE rb_cPrismPack;
-static VALUE rb_cPrismPackDirective;
-static VALUE rb_cPrismPackFormat;
-
-static VALUE v3_2_0_symbol;
-static VALUE pack_symbol;
-static VALUE unpack_symbol;
-
-#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
-# define UINT64T2NUM(x) ULL2NUM(x)
-# define NUM2UINT64T(x) (uint64_t)NUM2ULL(x)
-#elif SIZEOF_UINT64_T == SIZEOF_LONG
-# define UINT64T2NUM(x) ULONG2NUM(x)
-# define NUM2UINT64T(x) (uint64_t)NUM2ULONG(x)
-#else
-// error No uint64_t conversion
-#endif
-
-static VALUE
-pack_type_to_symbol(pm_pack_type type) {
- switch (type) {
- case PM_PACK_SPACE:
- return ID2SYM(rb_intern("SPACE"));
- case PM_PACK_COMMENT:
- return ID2SYM(rb_intern("COMMENT"));
- case PM_PACK_INTEGER:
- return ID2SYM(rb_intern("INTEGER"));
- case PM_PACK_UTF8:
- return ID2SYM(rb_intern("UTF8"));
- case PM_PACK_BER:
- return ID2SYM(rb_intern("BER"));
- case PM_PACK_FLOAT:
- return ID2SYM(rb_intern("FLOAT"));
- case PM_PACK_STRING_SPACE_PADDED:
- return ID2SYM(rb_intern("STRING_SPACE_PADDED"));
- case PM_PACK_STRING_NULL_PADDED:
- return ID2SYM(rb_intern("STRING_NULL_PADDED"));
- case PM_PACK_STRING_NULL_TERMINATED:
- return ID2SYM(rb_intern("STRING_NULL_TERMINATED"));
- case PM_PACK_STRING_MSB:
- return ID2SYM(rb_intern("STRING_MSB"));
- case PM_PACK_STRING_LSB:
- return ID2SYM(rb_intern("STRING_LSB"));
- case PM_PACK_STRING_HEX_HIGH:
- return ID2SYM(rb_intern("STRING_HEX_HIGH"));
- case PM_PACK_STRING_HEX_LOW:
- return ID2SYM(rb_intern("STRING_HEX_LOW"));
- case PM_PACK_STRING_UU:
- return ID2SYM(rb_intern("STRING_UU"));
- case PM_PACK_STRING_MIME:
- return ID2SYM(rb_intern("STRING_MIME"));
- case PM_PACK_STRING_BASE64:
- return ID2SYM(rb_intern("STRING_BASE64"));
- case PM_PACK_STRING_FIXED:
- return ID2SYM(rb_intern("STRING_FIXED"));
- case PM_PACK_STRING_POINTER:
- return ID2SYM(rb_intern("STRING_POINTER"));
- case PM_PACK_MOVE:
- return ID2SYM(rb_intern("MOVE"));
- case PM_PACK_BACK:
- return ID2SYM(rb_intern("BACK"));
- case PM_PACK_NULL:
- return ID2SYM(rb_intern("NULL"));
- default:
- return Qnil;
- }
-}
-
-static VALUE
-pack_signed_to_symbol(pm_pack_signed signed_type) {
- switch (signed_type) {
- case PM_PACK_UNSIGNED:
- return ID2SYM(rb_intern("UNSIGNED"));
- case PM_PACK_SIGNED:
- return ID2SYM(rb_intern("SIGNED"));
- case PM_PACK_SIGNED_NA:
- return ID2SYM(rb_intern("SIGNED_NA"));
- default:
- return Qnil;
- }
-}
-
-static VALUE
-pack_endian_to_symbol(pm_pack_endian endian) {
- switch (endian) {
- case PM_PACK_AGNOSTIC_ENDIAN:
- return ID2SYM(rb_intern("AGNOSTIC_ENDIAN"));
- case PM_PACK_LITTLE_ENDIAN:
- return ID2SYM(rb_intern("LITTLE_ENDIAN"));
- case PM_PACK_BIG_ENDIAN:
- return ID2SYM(rb_intern("BIG_ENDIAN"));
- case PM_PACK_NATIVE_ENDIAN:
- return ID2SYM(rb_intern("NATIVE_ENDIAN"));
- case PM_PACK_ENDIAN_NA:
- return ID2SYM(rb_intern("ENDIAN_NA"));
- default:
- return Qnil;
- }
-}
-
-static VALUE
-pack_size_to_symbol(pm_pack_size size) {
- switch (size) {
- case PM_PACK_SIZE_SHORT:
- return ID2SYM(rb_intern("SIZE_SHORT"));
- case PM_PACK_SIZE_INT:
- return ID2SYM(rb_intern("SIZE_INT"));
- case PM_PACK_SIZE_LONG:
- return ID2SYM(rb_intern("SIZE_LONG"));
- case PM_PACK_SIZE_LONG_LONG:
- return ID2SYM(rb_intern("SIZE_LONG_LONG"));
- case PM_PACK_SIZE_8:
- return ID2SYM(rb_intern("SIZE_8"));
- case PM_PACK_SIZE_16:
- return ID2SYM(rb_intern("SIZE_16"));
- case PM_PACK_SIZE_32:
- return ID2SYM(rb_intern("SIZE_32"));
- case PM_PACK_SIZE_64:
- return ID2SYM(rb_intern("SIZE_64"));
- case PM_PACK_SIZE_P:
- return ID2SYM(rb_intern("SIZE_P"));
- case PM_PACK_SIZE_NA:
- return ID2SYM(rb_intern("SIZE_NA"));
- default:
- return Qnil;
- }
-}
-
-static VALUE
-pack_length_type_to_symbol(pm_pack_length_type length_type) {
- switch (length_type) {
- case PM_PACK_LENGTH_FIXED:
- return ID2SYM(rb_intern("LENGTH_FIXED"));
- case PM_PACK_LENGTH_MAX:
- return ID2SYM(rb_intern("LENGTH_MAX"));
- case PM_PACK_LENGTH_RELATIVE:
- return ID2SYM(rb_intern("LENGTH_RELATIVE"));
- case PM_PACK_LENGTH_NA:
- return ID2SYM(rb_intern("LENGTH_NA"));
- default:
- return Qnil;
- }
-}
-
-static VALUE
-pack_encoding_to_ruby(pm_pack_encoding encoding) {
- int index;
- switch (encoding) {
- case PM_PACK_ENCODING_ASCII_8BIT:
- index = rb_ascii8bit_encindex();
- break;
- case PM_PACK_ENCODING_US_ASCII:
- index = rb_usascii_encindex();
- break;
- case PM_PACK_ENCODING_UTF_8:
- index = rb_utf8_encindex();
- break;
- default:
- return Qnil;
- }
- return rb_enc_from_encoding(rb_enc_from_index(index));
-}
-
-/**
- * call-seq:
- * Pack::parse(version, variant, source) -> Format
- *
- * Parse the given source and return a format object.
- */
-static VALUE
-pack_parse(VALUE self, VALUE version_symbol, VALUE variant_symbol, VALUE format_string) {
- if (version_symbol != v3_2_0_symbol) {
- rb_raise(rb_eArgError, "invalid version");
- }
-
- pm_pack_variant variant;
- if (variant_symbol == pack_symbol) {
- variant = PM_PACK_VARIANT_PACK;
- } else if (variant_symbol == unpack_symbol) {
- variant = PM_PACK_VARIANT_UNPACK;
- } else {
- rb_raise(rb_eArgError, "invalid variant");
- }
-
- StringValue(format_string);
-
- const char *format = RSTRING_PTR(format_string);
- const char *format_end = format + RSTRING_LEN(format_string);
- pm_pack_encoding encoding = PM_PACK_ENCODING_START;
-
- VALUE directives_array = rb_ary_new();
-
- while (format < format_end) {
- pm_pack_type type;
- pm_pack_signed signed_type;
- pm_pack_endian endian;
- pm_pack_size size;
- pm_pack_length_type length_type;
- uint64_t length;
-
- const char *directive_start = format;
-
- pm_pack_result parse_result = pm_pack_parse(variant, &format, format_end, &type, &signed_type, &endian,
- &size, &length_type, &length, &encoding);
-
- const char *directive_end = format;
-
- switch (parse_result) {
- case PM_PACK_OK:
- break;
- case PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE:
- rb_raise(rb_eArgError, "unsupported directive");
- case PM_PACK_ERROR_UNKNOWN_DIRECTIVE:
- rb_raise(rb_eArgError, "unsupported directive");
- case PM_PACK_ERROR_LENGTH_TOO_BIG:
- rb_raise(rb_eRangeError, "pack length too big");
- case PM_PACK_ERROR_BANG_NOT_ALLOWED:
- rb_raise(rb_eRangeError, "bang not allowed");
- case PM_PACK_ERROR_DOUBLE_ENDIAN:
- rb_raise(rb_eRangeError, "double endian");
- default:
- rb_bug("parse result");
- }
-
- if (type == PM_PACK_END) {
- break;
- }
-
- VALUE directive_args[9] = {
- version_symbol,
- variant_symbol,
- rb_usascii_str_new(directive_start, directive_end - directive_start),
- pack_type_to_symbol(type),
- pack_signed_to_symbol(signed_type),
- pack_endian_to_symbol(endian),
- pack_size_to_symbol(size),
- pack_length_type_to_symbol(length_type),
- UINT64T2NUM(length)
- };
-
- rb_ary_push(directives_array, rb_class_new_instance(9, directive_args, rb_cPrismPackDirective));
- }
-
- VALUE format_args[2];
- format_args[0] = directives_array;
- format_args[1] = pack_encoding_to_ruby(encoding);
- return rb_class_new_instance(2, format_args, rb_cPrismPackFormat);
-}
-
-/**
- * The function that gets called when Ruby initializes the prism extension.
- */
-void
-Init_prism_pack(void) {
- rb_cPrism = rb_define_module("Prism");
- rb_cPrismPack = rb_define_module_under(rb_cPrism, "Pack");
- rb_cPrismPackDirective = rb_define_class_under(rb_cPrismPack, "Directive", rb_cObject);
- rb_cPrismPackFormat = rb_define_class_under(rb_cPrismPack, "Format", rb_cObject);
- rb_define_singleton_method(rb_cPrismPack, "parse", pack_parse, 3);
-
- v3_2_0_symbol = ID2SYM(rb_intern("v3_2_0"));
- pack_symbol = ID2SYM(rb_intern("pack"));
- unpack_symbol = ID2SYM(rb_intern("unpack"));
-}
-
-#endif
diff --git a/prism/arena.c b/prism/arena.c
new file mode 100644
index 0000000000..64a731649d
--- /dev/null
+++ b/prism/arena.c
@@ -0,0 +1,117 @@
+#include "prism/internal/arena.h"
+
+#include "prism/internal/allocator.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * Compute the block allocation size using offsetof so it is correct regardless
+ * of PM_FLEX_ARRAY_LENGTH.
+ */
+#define PM_ARENA_BLOCK_SIZE(data_size) (offsetof(pm_arena_block_t, data) + (data_size))
+
+/** Initial block data size: 8 KB. */
+#define PM_ARENA_INITIAL_SIZE 8192
+
+/** Double the block size every this many blocks. */
+#define PM_ARENA_GROWTH_INTERVAL 8
+
+/** Maximum block data size: 1 MB. */
+#define PM_ARENA_MAX_SIZE (1024 * 1024)
+
+/**
+ * Compute the data size for the next block.
+ */
+static size_t
+pm_arena_next_block_size(const pm_arena_t *arena, size_t min_size) {
+ size_t size = PM_ARENA_INITIAL_SIZE;
+
+ for (size_t exp = PM_ARENA_GROWTH_INTERVAL; exp <= arena->block_count; exp += PM_ARENA_GROWTH_INTERVAL) {
+ if (size < PM_ARENA_MAX_SIZE) size *= 2;
+ }
+
+ return size > min_size ? size : min_size;
+}
+
+/**
+ * Allocate a new block with the given data capacity and initial usage, link it
+ * into the arena, and return it. Aborts on allocation failure.
+ */
+static pm_arena_block_t *
+pm_arena_block_new(pm_arena_t *arena, size_t data_size, size_t initial_used) {
+ assert(initial_used <= data_size);
+ pm_arena_block_t *block = (pm_arena_block_t *) xmalloc(PM_ARENA_BLOCK_SIZE(data_size));
+
+ if (block == NULL) {
+ fprintf(stderr, "prism: out of memory; aborting\n");
+ abort();
+ }
+
+ block->capacity = data_size;
+ block->used = initial_used;
+ block->prev = arena->current;
+ arena->current = block;
+ arena->block_count++;
+
+ return block;
+}
+
+/**
+ * Ensure the arena has at least `capacity` bytes available in its current
+ * block, allocating a new block if necessary. This allows callers to
+ * pre-size the arena to avoid repeated small block allocations.
+ */
+void
+pm_arena_reserve(pm_arena_t *arena, size_t capacity) {
+ if (capacity <= PM_ARENA_INITIAL_SIZE) return;
+ if (arena->current != NULL && (arena->current->capacity - arena->current->used) >= capacity) return;
+ pm_arena_block_new(arena, capacity, 0);
+}
+
+/**
+ * Slow path for pm_arena_alloc: allocate a new block and return a pointer to
+ * the first `size` bytes. Called when the current block has insufficient space.
+ */
+void *
+pm_arena_alloc_slow(pm_arena_t *arena, size_t size) {
+ size_t block_data_size = pm_arena_next_block_size(arena, size);
+ pm_arena_block_t *block = pm_arena_block_new(arena, block_data_size, size);
+ return block->data;
+}
+
+/**
+ * Returns a newly allocated and initialized arena.
+ */
+pm_arena_t *
+pm_arena_new(void) {
+ pm_arena_t *arena = (pm_arena_t *) xcalloc(1, sizeof(pm_arena_t));
+ if (arena == NULL) abort();
+ return arena;
+}
+
+/**
+ * Free all blocks in the arena.
+ */
+void
+pm_arena_cleanup(pm_arena_t *arena) {
+ pm_arena_block_t *block = arena->current;
+
+ while (block != NULL) {
+ pm_arena_block_t *prev = block->prev;
+ xfree_sized(block, PM_ARENA_BLOCK_SIZE(block->capacity));
+ block = prev;
+ }
+
+ *arena = (pm_arena_t) { 0 };
+}
+
+/**
+ * Frees both the held memory and the arena itself.
+ */
+void
+pm_arena_free(pm_arena_t *arena) {
+ pm_arena_cleanup(arena);
+ xfree_sized(arena, sizeof(pm_arena_t));
+}
diff --git a/prism/arena.h b/prism/arena.h
new file mode 100644
index 0000000000..e1fa8fc6ad
--- /dev/null
+++ b/prism/arena.h
@@ -0,0 +1,37 @@
+/**
+ * @file arena.h
+ *
+ * A bump allocator for the prism parser.
+ */
+#ifndef PRISM_ARENA_H
+#define PRISM_ARENA_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+
+/**
+ * An opaque pointer to an arena that is used for allocations.
+ */
+typedef struct pm_arena_t pm_arena_t;
+
+/**
+ * Returns a newly allocated and initialized arena. If the arena cannot be
+ * allocated, this function aborts the process.
+ *
+ * @returns A pointer to the newly allocated arena. It is the responsibility of
+ * the caller to free the arena using pm_arena_free when it is no longer
+ * needed.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_arena_t * pm_arena_new(void);
+
+/**
+ * Frees both the held memory and the arena itself.
+ *
+ * @param arena The arena to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_arena_free(pm_arena_t *arena) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_buffer.c b/prism/buffer.c
index 2136a7c43e..cb3b9a4fe8 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/buffer.c
@@ -1,31 +1,38 @@
-#include "prism/util/pm_buffer.h"
+#include "prism/internal/buffer.h"
-/**
- * Return the size of the pm_buffer_t struct.
- */
-size_t
-pm_buffer_sizeof(void) {
- return sizeof(pm_buffer_t);
-}
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/char.h"
+#include "prism/internal/allocator.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
/**
* Initialize a pm_buffer_t with the given capacity.
*/
-bool
-pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity) {
+void
+pm_buffer_init(pm_buffer_t *buffer, size_t capacity) {
buffer->length = 0;
buffer->capacity = capacity;
buffer->value = (char *) xmalloc(capacity);
- return buffer->value != NULL;
+ if (buffer->value == NULL) abort();
}
/**
- * Initialize a pm_buffer_t with its default values.
+ * Allocate and initialize a new buffer.
*/
-bool
-pm_buffer_init(pm_buffer_t *buffer) {
- return pm_buffer_init_capacity(buffer, 1024);
+pm_buffer_t *
+pm_buffer_new(void) {
+ pm_buffer_t *buffer = (pm_buffer_t *) xmalloc(sizeof(pm_buffer_t));
+ if (buffer == NULL) abort();
+
+ pm_buffer_init(buffer, 1024);
+ return buffer;
}
/**
@@ -47,9 +54,10 @@ pm_buffer_length(const pm_buffer_t *buffer) {
/**
* Append the given amount of space to the buffer.
*/
-static inline bool
+static PRISM_INLINE bool
pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
size_t next_length = buffer->length + length;
+ const size_t original_capacity = buffer->capacity;
if (next_length > buffer->capacity) {
if (buffer->capacity == 0) {
@@ -60,7 +68,7 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
buffer->capacity *= 2;
}
- buffer->value = xrealloc(buffer->value, buffer->capacity);
+ buffer->value = xrealloc_sized(buffer->value, buffer->capacity, original_capacity);
if (buffer->value == NULL) return false;
}
@@ -71,7 +79,7 @@ pm_buffer_append_length(pm_buffer_t *buffer, size_t length) {
/**
* Append a generic pointer to memory to the buffer.
*/
-static inline void
+static PRISM_INLINE void
pm_buffer_append(pm_buffer_t *buffer, const void *source, size_t length) {
size_t cursor = buffer->length;
if (pm_buffer_append_length(buffer, length)) {
@@ -349,9 +357,18 @@ pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t le
}
/**
- * Free the memory associated with the buffer.
+ * Free the memory held by the buffer.
+ */
+void
+pm_buffer_cleanup(pm_buffer_t *buffer) {
+ xfree_sized(buffer->value, buffer->capacity);
+}
+
+/**
+ * Free both the memory held by the buffer and the buffer itself.
*/
void
pm_buffer_free(pm_buffer_t *buffer) {
- xfree(buffer->value);
+ pm_buffer_cleanup(buffer);
+ xfree_sized(buffer, sizeof(pm_buffer_t));
}
diff --git a/prism/buffer.h b/prism/buffer.h
new file mode 100644
index 0000000000..24b572d2c3
--- /dev/null
+++ b/prism/buffer.h
@@ -0,0 +1,52 @@
+/**
+ * @file buffer.h
+ *
+ * A wrapper around a contiguous block of allocated memory.
+ */
+#ifndef PRISM_BUFFER_H
+#define PRISM_BUFFER_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+
+/**
+ * A wrapper around a contiguous block of allocated memory.
+ */
+typedef struct pm_buffer_t pm_buffer_t;
+
+/**
+ * Allocate and initialize a new buffer. If the buffer cannot be allocated, this
+ * function will abort the process.
+ *
+ * @returns A pointer to the initialized buffer. The caller is responsible for
+ * freeing the buffer with pm_buffer_free.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_buffer_t * pm_buffer_new(void);
+
+/**
+ * Free both the memory held by the buffer and the buffer itself.
+ *
+ * @param buffer The buffer to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer) PRISM_NONNULL(1);
+
+/**
+ * Return the value of the buffer.
+ *
+ * @param buffer The buffer to get the value of.
+ * @returns The value of the buffer.
+ */
+PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer) PRISM_NONNULL(1);
+
+/**
+ * Return the length of the buffer.
+ *
+ * @param buffer The buffer to get the length of.
+ * @returns The length of the buffer.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_char.c b/prism/char.c
index a51dc11645..08e457aa1f 100644
--- a/prism/util/pm_char.c
+++ b/prism/char.c
@@ -1,7 +1,8 @@
-#include "prism/util/pm_char.h"
+#include "prism/internal/char.h"
+
+#include "prism/compiler/inline.h"
+#include "prism/internal/line_offset_list.h"
-#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
-#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)
#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
@@ -13,7 +14,7 @@
#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
-static const uint8_t pm_byte_table[256] = {
+const uint8_t pm_byte_table[256] = {
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -57,7 +58,7 @@ static const uint8_t pm_number_table[256] = {
* Returns the number of characters at the start of the string that match the
* given kind. Disallows searching past the given maximum number of characters.
*/
-static inline size_t
+static PRISM_INLINE size_t
pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
if (length <= 0) return 0;
@@ -83,15 +84,15 @@ pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
* searching past the given maximum number of characters.
*/
size_t
-pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
+pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset) {
if (length <= 0) return 0;
- size_t size = 0;
- size_t maximum = (size_t) length;
+ uint32_t size = 0;
+ uint32_t maximum = (uint32_t) length;
while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
if (string[size] == '\n') {
- pm_newline_list_append(newline_list, string + size);
+ pm_line_offset_list_append(arena, line_offsets, start_offset + size + 1);
}
size++;
@@ -101,15 +102,6 @@ pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newlin
}
/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- */
-size_t
-pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
- return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
-
-/**
* Returns the number of characters at the start of the string that are regexp
* options. Disallows searching past the given maximum number of characters.
*/
@@ -118,36 +110,13 @@ pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
}
-/**
- * Returns true if the given character matches the given kind.
- */
-static inline bool
-pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
- return (pm_byte_table[b] & kind) != 0;
-}
-
-/**
- * Returns true if the given character is a whitespace character.
- */
-bool
-pm_char_is_whitespace(const uint8_t b) {
- return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
-}
-
-/**
- * Returns true if the given character is an inline whitespace character.
- */
-bool
-pm_char_is_inline_whitespace(const uint8_t b) {
- return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
-}
/**
* Scan through the string and return the number of characters at the start of
* the string that match the given kind. Disallows searching past the given
* maximum number of characters.
*/
-static inline size_t
+static PRISM_INLINE size_t
pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
if (length <= 0) return 0;
@@ -166,7 +135,7 @@ pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
* Additionally, report the location of the last invalid underscore character
* found in the string through the out invalid parameter.
*/
-static inline size_t
+static PRISM_INLINE size_t
pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
if (length <= 0) return 0;
@@ -267,7 +236,7 @@ pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint
/**
* Returns true if the given character matches the given kind.
*/
-static inline bool
+static PRISM_INLINE bool
pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
return (pm_number_table[b] & kind) != 0;
}
@@ -303,16 +272,3 @@ bool
pm_char_is_hexadecimal_digit(const uint8_t b) {
return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
}
-
-#undef PRISM_CHAR_BIT_WHITESPACE
-#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
-#undef PRISM_CHAR_BIT_REGEXP_OPTION
-
-#undef PRISM_NUMBER_BIT_BINARY_DIGIT
-#undef PRISM_NUMBER_BIT_BINARY_NUMBER
-#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
-#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
-#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
-#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
-#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
-#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/comments.h b/prism/comments.h
new file mode 100644
index 0000000000..2270d53889
--- /dev/null
+++ b/prism/comments.h
@@ -0,0 +1,43 @@
+/**
+ * @file comments.h
+ *
+ * Types and functions related to comments found during parsing.
+ */
+#ifndef PRISM_COMMENTS_H
+#define PRISM_COMMENTS_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+
+#include <stddef.h>
+
+/** This is the type of a comment that we've found while parsing. */
+typedef enum {
+ PM_COMMENT_INLINE,
+ PM_COMMENT_EMBDOC
+} pm_comment_type_t;
+
+/** An opaque pointer to a comment found while parsing. */
+typedef struct pm_comment_t pm_comment_t;
+
+/**
+ * Returns the location associated with the given comment.
+ *
+ * @param comment the comment whose location we want to get
+ * @returns the location associated with the given comment
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_comment_location(const pm_comment_t *comment) PRISM_NONNULL(1);
+
+/**
+ * Returns the type associated with the given comment.
+ *
+ * @param comment the comment whose type we want to get
+ * @returns the type associated with the given comment. This can either be
+ * PM_COMMENT_INLINE or PM_COMMENT_EMBDOC.
+ */
+PRISM_EXPORTED_FUNCTION pm_comment_type_t pm_comment_type(const pm_comment_t *comment) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/compiler/accel.h b/prism/compiler/accel.h
new file mode 100644
index 0000000000..be23236d1d
--- /dev/null
+++ b/prism/compiler/accel.h
@@ -0,0 +1,19 @@
+/**
+ * @file compiler/accel.h
+ */
+#ifndef PRISM_COMPILER_ACCEL_H
+#define PRISM_COMPILER_ACCEL_H
+
+/**
+ * Platform detection for SIMD/fast-path implementations. At most one of these
+ * macros is defined, selecting the best available vectorization strategy.
+ */
+#if (defined(__aarch64__) && defined(__ARM_NEON)) || (defined(_MSC_VER) && defined(_M_ARM64))
+# define PRISM_HAS_NEON
+#elif (defined(__x86_64__) && defined(__SSSE3__)) || (defined(_MSC_VER) && defined(_M_X64))
+# define PRISM_HAS_SSSE3
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define PRISM_HAS_SWAR
+#endif
+
+#endif
diff --git a/prism/compiler/align.h b/prism/compiler/align.h
new file mode 100644
index 0000000000..22cb49a48c
--- /dev/null
+++ b/prism/compiler/align.h
@@ -0,0 +1,36 @@
+/**
+ * @file compiler/align.h
+ */
+#ifndef PRISM_COMPILER_ALIGN_H
+#define PRISM_COMPILER_ALIGN_H
+
+/**
+ * Compiler-agnostic macros for specifying alignment of types and variables.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */
+ /** Specify alignment for a type or variable. */
+ #define PRISM_ALIGNAS _Alignas
+
+ /** Get the alignment requirement of a type. */
+ #define PRISM_ALIGNOF _Alignof
+#elif defined(__GNUC__) || defined(__clang__)
+ /** Specify alignment for a type or variable. */
+ #define PRISM_ALIGNAS(size) __attribute__((aligned(size)))
+
+ /** Get the alignment requirement of a type. */
+ #define PRISM_ALIGNOF(type) __alignof__(type)
+#elif defined(_MSC_VER)
+ /** Specify alignment for a type or variable. */
+ #define PRISM_ALIGNAS(size) __declspec(align(size))
+
+ /** Get the alignment requirement of a type. */
+ #define PRISM_ALIGNOF(type) __alignof(type)
+#else
+ /** Void because this platform does not support specifying alignment. */
+ #define PRISM_ALIGNAS(size)
+
+ /** Fallback to sizeof as alignment requirement of a type. */
+ #define PRISM_ALIGNOF(type) sizeof(type)
+#endif
+
+#endif
diff --git a/prism/compiler/exported.h b/prism/compiler/exported.h
new file mode 100644
index 0000000000..823773ecbb
--- /dev/null
+++ b/prism/compiler/exported.h
@@ -0,0 +1,24 @@
+/**
+ * @file compiler/exported.h
+ */
+#ifndef PRISM_COMPILER_EXPORTED_H
+#define PRISM_COMPILER_EXPORTED_H
+
+/**
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
+ * need to mark certain functions as being publically-visible. This macro does
+ * that in a compiler-agnostic way.
+ */
+#ifndef PRISM_EXPORTED_FUNCTION
+# ifdef PRISM_EXPORT_SYMBOLS
+# ifdef _WIN32
+# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
+# else
+# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
+# endif
+# else
+# define PRISM_EXPORTED_FUNCTION
+# endif
+#endif
+
+#endif
diff --git a/prism/compiler/fallthrough.h b/prism/compiler/fallthrough.h
new file mode 100644
index 0000000000..ce1b450e8a
--- /dev/null
+++ b/prism/compiler/fallthrough.h
@@ -0,0 +1,22 @@
+/**
+ * @file compiler/fallthrough.h
+ */
+#ifndef PRISM_COMPILER_FALLTHROUGH_H
+#define PRISM_COMPILER_FALLTHROUGH_H
+
+/**
+ * We use -Wimplicit-fallthrough to guard potentially unintended fall-through
+ * between cases of a switch. Use PRISM_FALLTHROUGH to explicitly annotate cases
+ * where the fallthrough is intentional.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 or later */
+ #define PRISM_FALLTHROUGH [[fallthrough]];
+#elif defined(__GNUC__) || defined(__clang__)
+ #define PRISM_FALLTHROUGH __attribute__((fallthrough));
+#elif defined(_MSC_VER)
+ #define PRISM_FALLTHROUGH __fallthrough;
+#else
+ #define PRISM_FALLTHROUGH
+#endif
+
+#endif
diff --git a/prism/compiler/filesystem.h b/prism/compiler/filesystem.h
new file mode 100644
index 0000000000..f988909db8
--- /dev/null
+++ b/prism/compiler/filesystem.h
@@ -0,0 +1,32 @@
+/**
+ * @file compiler/filesystem.h
+ *
+ * Platform detection for mmap and filesystem support.
+ */
+#ifndef PRISM_COMPILER_FILESYSTEM_H
+#define PRISM_COMPILER_FILESYSTEM_H
+
+/**
+ * In general, libc for embedded systems does not support memory-mapped files.
+ * If the target platform is POSIX or Windows, we can map a file in memory and
+ * read it in a more efficient manner.
+ */
+#ifdef _WIN32
+# define PRISM_HAS_MMAP
+#else
+# include <unistd.h>
+# ifdef _POSIX_MAPPED_FILES
+# define PRISM_HAS_MMAP
+# endif
+#endif
+
+/**
+ * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
+ * related code from the library. All filesystem related code should be guarded
+ * by PRISM_HAS_FILESYSTEM.
+ */
+#ifndef PRISM_HAS_NO_FILESYSTEM
+# define PRISM_HAS_FILESYSTEM
+#endif
+
+#endif
diff --git a/prism/compiler/flex_array.h b/prism/compiler/flex_array.h
new file mode 100644
index 0000000000..7504b5fdd3
--- /dev/null
+++ b/prism/compiler/flex_array.h
@@ -0,0 +1,19 @@
+/**
+ * @file compiler/flex_array.h
+ */
+#ifndef PRISM_COMPILER_FLEX_ARRAY_H
+#define PRISM_COMPILER_FLEX_ARRAY_H
+
+/**
+ * A macro for helper define a flexible array member. C99 supports `data[]`, GCC
+ * supports `data[0]` as an extension, and older compilers require `data[1]`.
+ */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+ #define PM_FLEX_ARRAY_LENGTH /* data[] */
+#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)
+ #define PM_FLEX_ARRAY_LENGTH 0 /* data[0] */
+#else
+ #define PM_FLEX_ARRAY_LENGTH 1 /* data[1] */
+#endif
+
+#endif
diff --git a/prism/compiler/force_inline.h b/prism/compiler/force_inline.h
new file mode 100644
index 0000000000..e189d592d6
--- /dev/null
+++ b/prism/compiler/force_inline.h
@@ -0,0 +1,21 @@
+/**
+ * @file compiler/force_inline.h
+ */
+#ifndef PRISM_COMPILER_FORCE_INLINE_H
+#define PRISM_COMPILER_FORCE_INLINE_H
+
+#include "prism/compiler/inline.h"
+
+/**
+ * Force a function to be inlined at every call site. Use sparingly — only for
+ * small, hot functions where the compiler's heuristics fail to inline.
+ */
+#if defined(_MSC_VER)
+# define PRISM_FORCE_INLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__)
+# define PRISM_FORCE_INLINE PRISM_INLINE __attribute__((always_inline))
+#else
+# define PRISM_FORCE_INLINE PRISM_INLINE
+#endif
+
+#endif
diff --git a/prism/compiler/format.h b/prism/compiler/format.h
new file mode 100644
index 0000000000..32f4c3c6d7
--- /dev/null
+++ b/prism/compiler/format.h
@@ -0,0 +1,25 @@
+/**
+ * @file compiler/format.h
+ */
+#ifndef PRISM_COMPILER_FORMAT_H
+#define PRISM_COMPILER_FORMAT_H
+
+/**
+ * Certain compilers support specifying that a function accepts variadic
+ * parameters that look like printf format strings to provide a better developer
+ * experience when someone is using the function. This macro does that in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+# if defined(__MINGW_PRINTF_FORMAT)
+# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((format(__MINGW_PRINTF_FORMAT, fmt_idx_, arg_idx_)))
+# else
+# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((format(printf, fmt_idx_, arg_idx_)))
+# endif
+#elif defined(__clang__)
+# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_) __attribute__((__format__(__printf__, fmt_idx_, arg_idx_)))
+#else
+# define PRISM_ATTRIBUTE_FORMAT(fmt_idx_, arg_idx_)
+#endif
+
+#endif
diff --git a/prism/compiler/inline.h b/prism/compiler/inline.h
new file mode 100644
index 0000000000..856a375691
--- /dev/null
+++ b/prism/compiler/inline.h
@@ -0,0 +1,17 @@
+/**
+ * @file compiler/inline.h
+ */
+#ifndef PRISM_COMPILER_INLINE_H
+#define PRISM_COMPILER_INLINE_H
+
+/**
+ * Old Visual Studio versions do not support the inline keyword, so we need to
+ * define it to be __inline.
+ */
+#if defined(_MSC_VER) && !defined(inline)
+# define PRISM_INLINE __inline
+#else
+# define PRISM_INLINE inline
+#endif
+
+#endif
diff --git a/prism/compiler/nodiscard.h b/prism/compiler/nodiscard.h
new file mode 100644
index 0000000000..ccd6c00719
--- /dev/null
+++ b/prism/compiler/nodiscard.h
@@ -0,0 +1,22 @@
+/**
+ * @file compiler/nodiscard.h
+ */
+#ifndef PRISM_COMPILER_NODISCARD_H
+#define PRISM_COMPILER_NODISCARD_H
+
+/**
+ * Mark the return value of a function as important so that the compiler warns
+ * if a caller ignores it. This is useful for functions that return error codes
+ * or allocated resources that must be freed.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+# define PRISM_NODISCARD [[nodiscard]]
+#elif defined(__GNUC__) || defined(__clang__)
+# define PRISM_NODISCARD __attribute__((__warn_unused_result__))
+#elif defined(_MSC_VER)
+# define PRISM_NODISCARD _Check_return_
+#else
+# define PRISM_NODISCARD
+#endif
+
+#endif
diff --git a/prism/compiler/nonnull.h b/prism/compiler/nonnull.h
new file mode 100644
index 0000000000..9d19355665
--- /dev/null
+++ b/prism/compiler/nonnull.h
@@ -0,0 +1,18 @@
+/**
+ * @file compiler/nonnull.h
+ */
+#ifndef PRISM_COMPILER_NONNULL_H
+#define PRISM_COMPILER_NONNULL_H
+
+/**
+ * Mark the parameters of a function as non-null. This allows the compiler to
+ * warn if a caller passes NULL for a parameter that should never be NULL. The
+ * arguments are the 1-based indices of the parameters.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+# define PRISM_NONNULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
+#else
+# define PRISM_NONNULL(...)
+#endif
+
+#endif
diff --git a/prism/compiler/unused.h b/prism/compiler/unused.h
new file mode 100644
index 0000000000..6a9e125dde
--- /dev/null
+++ b/prism/compiler/unused.h
@@ -0,0 +1,18 @@
+/**
+ * @file compiler/unused.h
+ */
+#ifndef PRISM_COMPILER_UNUSED_H
+#define PRISM_COMPILER_UNUSED_H
+
+/**
+ * GCC will warn if you specify a function or parameter that is unused at
+ * runtime. This macro allows you to mark a function or parameter as unused in a
+ * compiler-agnostic way.
+ */
+#if defined(__GNUC__)
+# define PRISM_UNUSED __attribute__((unused))
+#else
+# define PRISM_UNUSED
+#endif
+
+#endif
diff --git a/prism/config.yml b/prism/config.yml
index 3d5eee190f..bbbc5f3d33 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -17,6 +17,8 @@ errors:
- ARGUMENT_FORWARDING_UNBOUND
- ARGUMENT_NO_FORWARDING_AMPERSAND
- ARGUMENT_NO_FORWARDING_ELLIPSES
+ - ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA
+ - ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK
- ARGUMENT_NO_FORWARDING_STAR
- ARGUMENT_NO_FORWARDING_STAR_STAR
- ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT
@@ -60,7 +62,9 @@ errors:
- CONDITIONAL_WHILE_PREDICATE
- CONSTANT_PATH_COLON_COLON_CONSTANT
- DEF_ENDLESS
+ - DEF_ENDLESS_PARAMETERS
- DEF_ENDLESS_SETTER
+ - DEF_ENDLESS_DO_BLOCK
- DEF_NAME
- DEF_PARAMS_TERM
- DEF_PARAMS_TERM_PAREN
@@ -101,6 +105,8 @@ errors:
- EXPECT_FOR_DELIMITER
- EXPECT_IDENT_REQ_PARAMETER
- EXPECT_IN_DELIMITER
+ - EXPECT_LPAREN_AFTER_NOT_LPAREN
+ - EXPECT_LPAREN_AFTER_NOT_OTHER
- EXPECT_LPAREN_REQ_PARAMETER
- EXPECT_MESSAGE
- EXPECT_RBRACKET
@@ -216,6 +222,7 @@ errors:
- PARAMETER_WILD_LOOSE_COMMA
- PATTERN_ARRAY_MULTIPLE_RESTS
- PATTERN_CAPTURE_DUPLICATE
+ - PATTERN_CAPTURE_IN_ALTERNATIVE
- PATTERN_EXPRESSION_AFTER_BRACKET
- PATTERN_EXPRESSION_AFTER_COMMA
- PATTERN_EXPRESSION_AFTER_HROCKET
@@ -241,7 +248,9 @@ errors:
- PATTERN_TERM_PAREN
- PIPEPIPEEQ_MULTI_ASSIGN
- REGEXP_ENCODING_OPTION_MISMATCH
+ - REGEXP_ESCAPED_NON_ASCII_IN_UTF8
- REGEXP_INCOMPAT_CHAR_ENCODING
+ - REGEXP_INVALID_CHAR_PROPERTY
- REGEXP_INVALID_UNICODE_RANGE
- REGEXP_NON_ESCAPED_MBC
- REGEXP_PARSE_ERROR
@@ -277,6 +286,7 @@ errors:
- UNEXPECTED_INDEX_KEYWORDS
- UNEXPECTED_LABEL
- UNEXPECTED_MULTI_WRITE
+ - UNEXPECTED_PARAMETER_DEFAULT_VALUE
- UNEXPECTED_RANGE_OPERATOR
- UNEXPECTED_SAFE_NAVIGATION
- UNEXPECTED_TOKEN_CLOSE_CONTEXT
@@ -320,13 +330,44 @@ warnings:
- UNUSED_LOCAL_VARIABLE
- VOID_STATEMENT
tokens:
+ # The order of the tokens at the beginning is important, because we use them
+ # for a lookup table.
- name: EOF
value: 1
comment: final token in the file
- - name: MISSING
- comment: "a token that was expected but not found"
- - name: NOT_PROVIDED
- comment: "a token that was not present but it is okay"
+ - name: BRACE_RIGHT
+ comment: "}"
+ - name: COMMA
+ comment: ","
+ - name: EMBEXPR_END
+ comment: "}"
+ - name: KEYWORD_DO
+ comment: "do"
+ - name: KEYWORD_ELSE
+ comment: "else"
+ - name: KEYWORD_ELSIF
+ comment: "elsif"
+ - name: KEYWORD_END
+ comment: "end"
+ - name: KEYWORD_ENSURE
+ comment: "ensure"
+ - name: KEYWORD_IN
+ comment: "in"
+ - name: KEYWORD_RESCUE
+ comment: "rescue"
+ - name: KEYWORD_THEN
+ comment: "then"
+ - name: KEYWORD_WHEN
+ comment: "when"
+ - name: NEWLINE
+ comment: "a newline character outside of other tokens"
+ - name: PARENTHESIS_RIGHT
+ comment: ")"
+ - name: PIPE
+ comment: "|"
+ - name: SEMICOLON
+ comment: ";"
+ # Tokens from here on are not used for lookup, and can be in any order.
- name: AMPERSAND
comment: "&"
- name: AMPERSAND_AMPERSAND
@@ -349,8 +390,6 @@ tokens:
comment: "!~"
- name: BRACE_LEFT
comment: "{"
- - name: BRACE_RIGHT
- comment: "}"
- name: BRACKET_LEFT
comment: "["
- name: BRACKET_LEFT_ARRAY
@@ -373,8 +412,6 @@ tokens:
comment: ":"
- name: COLON_COLON
comment: "::"
- - name: COMMA
- comment: ","
- name: COMMENT
comment: "a comment"
- name: CONSTANT
@@ -393,8 +430,6 @@ tokens:
comment: "a line inside of embedded documentation"
- name: EMBEXPR_BEGIN
comment: "#{"
- - name: EMBEXPR_END
- comment: "}"
- name: EMBVAR
comment: "#"
- name: EQUAL
@@ -461,20 +496,12 @@ tokens:
comment: "def"
- name: KEYWORD_DEFINED
comment: "defined?"
- - name: KEYWORD_DO
- comment: "do"
+ - name: KEYWORD_DO_BLOCK
+ comment: "do keyword for a block attached to a command"
- name: KEYWORD_DO_LOOP
comment: "do keyword for a predicate in a while, until, or for loop"
- - name: KEYWORD_ELSE
- comment: "else"
- - name: KEYWORD_ELSIF
- comment: "elsif"
- - name: KEYWORD_END
- comment: "end"
- name: KEYWORD_END_UPCASE
comment: "END"
- - name: KEYWORD_ENSURE
- comment: "ensure"
- name: KEYWORD_FALSE
comment: "false"
- name: KEYWORD_FOR
@@ -483,8 +510,6 @@ tokens:
comment: "if"
- name: KEYWORD_IF_MODIFIER
comment: "if in the modifier form"
- - name: KEYWORD_IN
- comment: "in"
- name: KEYWORD_MODULE
comment: "module"
- name: KEYWORD_NEXT
@@ -497,8 +522,6 @@ tokens:
comment: "or"
- name: KEYWORD_REDO
comment: "redo"
- - name: KEYWORD_RESCUE
- comment: "rescue"
- name: KEYWORD_RESCUE_MODIFIER
comment: "rescue in the modifier form"
- name: KEYWORD_RETRY
@@ -509,8 +532,6 @@ tokens:
comment: "self"
- name: KEYWORD_SUPER
comment: "super"
- - name: KEYWORD_THEN
- comment: "then"
- name: KEYWORD_TRUE
comment: "true"
- name: KEYWORD_UNDEF
@@ -523,8 +544,6 @@ tokens:
comment: "until"
- name: KEYWORD_UNTIL_MODIFIER
comment: "until in the modifier form"
- - name: KEYWORD_WHEN
- comment: "when"
- name: KEYWORD_WHILE
comment: "while"
- name: KEYWORD_WHILE_MODIFIER
@@ -561,16 +580,12 @@ tokens:
comment: "-="
- name: MINUS_GREATER
comment: "->"
- - name: NEWLINE
- comment: "a newline character outside of other tokens"
- name: NUMBERED_REFERENCE
comment: "a numbered reference to a capture group in the previous regular expression match"
- name: PARENTHESIS_LEFT
comment: "("
- name: PARENTHESIS_LEFT_PARENTHESES
comment: "( for a parentheses node"
- - name: PARENTHESIS_RIGHT
- comment: ")"
- name: PERCENT
comment: "%"
- name: PERCENT_EQUAL
@@ -585,8 +600,6 @@ tokens:
comment: "%I"
- name: PERCENT_UPPER_W
comment: "%W"
- - name: PIPE
- comment: "|"
- name: PIPE_EQUAL
comment: "|="
- name: PIPE_PIPE
@@ -603,8 +616,6 @@ tokens:
comment: "the beginning of a regular expression"
- name: REGEXP_END
comment: "the end of a regular expression"
- - name: SEMICOLON
- comment: ";"
- name: SLASH
comment: "/"
- name: SLASH_EQUAL
@@ -803,8 +814,6 @@ nodes:
- GlobalVariableReadNode
- BackReferenceReadNode
- NumberedReferenceReadNode
- - on error: SymbolNode # alias $a b
- - on error: MissingNode # alias $a 42
comment: |
Represents the old name of the global variable that can be used before aliasing.
@@ -813,7 +822,7 @@ nodes:
- name: keyword_loc
type: location
comment: |
- The location of the `alias` keyword.
+ The Location of the `alias` keyword.
alias $foo $bar
^^^^^
@@ -845,8 +854,6 @@ nodes:
kind:
- SymbolNode
- InterpolatedSymbolNode
- - on error: GlobalVariableReadNode # alias a $b
- - on error: MissingNode # alias a 42
comment: |
Represents the old name of the method that will be aliased.
@@ -861,7 +868,7 @@ nodes:
- name: keyword_loc
type: location
comment: |
- Represents the location of the `alias` keyword.
+ Represents the Location of the `alias` keyword.
alias foo bar
^^^^^
@@ -891,7 +898,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- Represents the alternation operator location.
+ Represents the alternation operator Location.
foo => bar | baz
^
@@ -927,7 +934,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `and` keyword or the `&&` operator.
+ The Location of the `and` keyword or the `&&` operator.
left and right
^^^
@@ -962,7 +969,7 @@ nodes:
- name: opening_loc
type: location?
comment: |
- Represents the optional source location for the opening token.
+ Represents the optional source Location for the opening token.
[1,2,3] # "["
%w[foo bar baz] # "%w["
@@ -971,7 +978,7 @@ nodes:
- name: closing_loc
type: location?
comment: |
- Represents the optional source location for the closing token.
+ Represents the optional source Location for the closing token.
[1,2,3] # "]"
%w[foo bar baz] # "]"
@@ -987,8 +994,19 @@ nodes:
- name: constant
type: node?
kind:
- - ConstantReadNode
- ConstantPathNode
+ - ConstantReadNode
+ comment: |
+ Represents the optional constant preceding the Array
+
+ foo in Bar[]
+ ^^^
+
+ foo in Bar[1, 2, 3]
+ ^^^
+
+ foo in Bar::Baz[1, 2, 3]
+ ^^^^^^^^
- name: requireds
type: node[]
kind: pattern expression
@@ -999,7 +1017,9 @@ nodes:
^ ^
- name: rest
type: node?
- kind: pattern expression
+ kind:
+ - ImplicitRestNode
+ - SplatNode
comment: |
Represents the rest element of the array pattern.
@@ -1016,14 +1036,14 @@ nodes:
- name: opening_loc
type: location?
comment: |
- Represents the opening location of the array pattern.
+ Represents the opening Location of the array pattern.
foo in [1, 2]
^
- name: closing_loc
type: location?
comment: |
- Represents the closing location of the array pattern.
+ Represents the closing Location of the array pattern.
foo in [1, 2]
^
@@ -1031,19 +1051,19 @@ nodes:
Represents an array pattern in pattern matching.
foo in 1, 2
- ^^^^^^^^^^^
+ ^^^^
foo in [1, 2]
- ^^^^^^^^^^^^^
+ ^^^^^^
foo in *bar
- ^^^^^^^^^^^
+ ^^^^
foo in Bar[]
- ^^^^^^^^^^^^
+ ^^^^^
foo in Bar[1, 2, 3]
- ^^^^^^^^^^^^^^^^^^^
+ ^^^^^^^^^^^^
- name: AssocNode
fields:
- name: key
@@ -1074,7 +1094,7 @@ nodes:
- name: operator_loc
type: location?
comment: |
- The location of the `=>` operator, if present.
+ The Location of the `=>` operator, if present.
{ foo => bar }
^^
@@ -1096,7 +1116,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `**` operator.
+ The Location of the `**` operator.
{ **x }
^^
@@ -1125,7 +1145,7 @@ nodes:
- name: begin_keyword_loc
type: location?
comment: |
- Represents the location of the `begin` keyword.
+ Represents the Location of the `begin` keyword.
begin x end
^^^^^
@@ -1152,7 +1172,7 @@ nodes:
Represents the else clause within the begin block.
begin x; rescue y; else z; end
- ^^^^^^
+ ^^^^^^^^^^^
- name: ensure_clause
type: node?
kind: EnsureNode
@@ -1164,7 +1184,7 @@ nodes:
- name: end_keyword_loc
type: location?
comment: |
- Represents the location of the `end` keyword.
+ Represents the Location of the `end` keyword.
begin x end
^^^
@@ -1185,11 +1205,11 @@ nodes:
The expression that is being passed as a block argument. This can be any [non-void expression](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
foo(&args)
- ^^^^^
+ ^^^^
- name: operator_loc
type: location
comment: |
- Represents the location of the `&` operator.
+ Represents the Location of the `&` operator.
foo(&args)
^
@@ -1197,7 +1217,7 @@ nodes:
Represents a block argument using `&`.
bar(&args)
- ^^^^^^^^^^
+ ^^^^^
- name: BlockLocalVariableNode
flags: ParameterFlags
fields:
@@ -1250,17 +1270,17 @@ nodes:
- name: opening_loc
type: location
comment: |
- Represents the location of the opening `|`.
+ Represents the Location of the opening `{` or `do`.
[1, 2, 3].each { |i| puts x }
- ^
+ ^
- name: closing_loc
type: location
comment: |
- Represents the location of the closing `|`.
+ Represents the Location of the closing `}` or `end`.
[1, 2, 3].each { |i| puts x }
- ^
+ ^
comment: |
Represents a block of ruby code.
@@ -1280,14 +1300,14 @@ nodes:
- name: name_loc
type: location?
comment: |
- Represents the location of the block parameter name.
+ Represents the Location of the block parameter name.
def a(&b)
^
- name: operator_loc
type: location
comment: |
- Represents the location of the `&` operator.
+ Represents the Location of the `&` operator.
def a(&b)
^
@@ -1327,7 +1347,7 @@ nodes:
- name: opening_loc
type: location?
comment: |
- Represents the opening location of the block parameters.
+ Represents the opening Location of the block parameters.
-> (a, b = 1; local) { }
^
@@ -1338,7 +1358,7 @@ nodes:
- name: closing_loc
type: location?
comment: |
- Represents the closing location of the block parameters.
+ Represents the closing Location of the block parameters.
-> (a, b = 1; local) { }
^
@@ -1368,7 +1388,7 @@ nodes:
- name: keyword_loc
type: location
comment: |
- The location of the `break` keyword.
+ The Location of the `break` keyword.
break foo
^^^^^
@@ -1391,14 +1411,14 @@ nodes:
- name: call_operator_loc
type: location?
comment: |
- Represents the location of the call operator.
+ Represents the Location of the call operator.
foo.bar &&= value
^
- name: message_loc
type: location?
comment: |
- Represents the location of the message.
+ Represents the Location of the message.
foo.bar &&= value
^^^
@@ -1419,7 +1439,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- Represents the location of the operator.
+ Represents the Location of the operator.
foo.bar &&= value
^^^
@@ -1456,7 +1476,7 @@ nodes:
- name: call_operator_loc
type: location?
comment: |
- Represents the location of the call operator.
+ Represents the Location of the call operator.
foo.bar
^
@@ -1473,14 +1493,15 @@ nodes:
- name: message_loc
type: location?
comment: |
- Represents the location of the message.
+ Represents the Location of the message.
foo.bar
^^^
- name: opening_loc
type: location?
comment: |
- Represents the location of the left parenthesis.
+ Represents the Location of the left parenthesis.
+
foo(bar)
^
- name: arguments
@@ -1494,10 +1515,20 @@ nodes:
- name: closing_loc
type: location?
comment: |
- Represents the location of the right parenthesis.
+ Represents the Location of the right parenthesis.
foo(bar)
^
+ - name: equal_loc
+ type: location?
+ comment: |
+ Represents the Location of the equal sign, in the case that this is an attribute write.
+
+ foo.bar = value
+ ^
+
+ foo[bar] = value
+ ^
- name: block
type: node?
kind:
@@ -1542,14 +1573,14 @@ nodes:
- name: call_operator_loc
type: location?
comment: |
- Represents the location of the call operator.
+ Represents the Location of the call operator.
foo.bar += value
^
- name: message_loc
type: location?
comment: |
- Represents the location of the message.
+ Represents the Location of the message.
foo.bar += value
^^^
@@ -1577,7 +1608,7 @@ nodes:
- name: binary_operator_loc
type: location
comment: |
- Represents the location of the binary operator.
+ Represents the Location of the binary operator.
foo.bar += value
^^
@@ -1608,14 +1639,14 @@ nodes:
- name: call_operator_loc
type: location?
comment: |
- Represents the location of the call operator.
+ Represents the Location of the call operator.
foo.bar ||= value
^
- name: message_loc
type: location?
comment: |
- Represents the location of the message.
+ Represents the Location of the message.
foo.bar ||= value
^^^
@@ -1636,7 +1667,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- Represents the location of the operator.
+ Represents the Location of the operator.
foo.bar ||= value
^^^
@@ -1667,7 +1698,7 @@ nodes:
- name: call_operator_loc
type: location
comment: |
- Represents the location of the call operator.
+ Represents the Location of the call operator.
foo.bar = 1
^
@@ -1681,7 +1712,7 @@ nodes:
- name: message_loc
type: location
comment: |
- Represents the location of the message.
+ Represents the Location of the message.
foo.bar = 1
^^^
@@ -1719,7 +1750,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- Represents the location of the `=>` operator.
+ Represents the Location of the `=>` operator.
foo => bar
^^
@@ -1727,7 +1758,7 @@ nodes:
Represents assigning to a local variable in pattern matching.
foo => [bar => baz]
- ^^^^^^^^^^^^
+ ^^^^^^^^^^
- name: CaseMatchNode
fields:
- name: predicate
@@ -1737,7 +1768,7 @@ nodes:
Represents the predicate of the case match. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
case true; in false; end
- ^^^^
+ ^^^^
- name: conditions
type: node[]
kind: InNode
@@ -1753,18 +1784,18 @@ nodes:
Represents the else clause of the case match.
case true; in false; else; end
- ^^^^
+ ^^^^^^^^^
- name: case_keyword_loc
type: location
comment: |
- Represents the location of the `case` keyword.
+ Represents the Location of the `case` keyword.
case true; in false; end
^^^^
- name: end_keyword_loc
type: location
comment: |
- Represents the location of the `end` keyword.
+ Represents the Location of the `end` keyword.
case true; in false; end
^^^
@@ -1784,7 +1815,7 @@ nodes:
Represents the predicate of the case statement. This can be either `nil` or any [non-void expressions](https://github.com/ruby/prism/blob/main/docs/parsing_rules.md#non-void-expression).
case true; when false; end
- ^^^^
+ ^^^^
- name: conditions
type: node[]
kind: WhenNode
@@ -1800,18 +1831,18 @@ nodes:
Represents the else clause of the case statement.
case true; when false; else; end
- ^^^^
+ ^^^^^^^^^
- name: case_keyword_loc
type: location
comment: |
- Represents the location of the `case` keyword.
+ Represents the Location of the `case` keyword.
case true; when false; end
^^^^
- name: end_keyword_loc
type: location
comment: |
- Represents the location of the `end` keyword.
+ Represents the Location of the `end` keyword.
case true; when false; end
^^^
@@ -1828,26 +1859,54 @@ nodes:
type: constant[]
- name: class_keyword_loc
type: location
+ comment: |
+ Represents the Location of the `class` keyword.
+
+ class Foo end
+ ^^^^^
- name: constant_path
type: node
kind:
- ConstantReadNode
- ConstantPathNode
- - on error: CallNode # class 0.X end
- name: inheritance_operator_loc
type: location?
+ comment: |
+ Represents the Location of the `<` operator.
+
+ class Foo < Bar
+ ^
- name: superclass
type: node?
kind: non-void expression
+ comment: |
+ Represents the superclass of the class.
+
+ class Foo < Bar
+ ^^^
- name: body
type: node?
kind:
- StatementsNode
- BeginNode
+ comment: |
+ Represents the body of the class.
+
+ class Foo; bar; end
+ ^^^
- name: end_keyword_loc
type: location
+ comment: |
+ Represents the Location of the `end` keyword.
+
+ class Foo end
+ ^^^
- name: name
type: constant
+ comment: |
+ The name of the class.
+
+ class Foo end # name `:Foo`
comment: |
Represents a class declaration involving the `class` keyword.
@@ -1865,14 +1924,14 @@ nodes:
- name: name_loc
type: location
comment: |
- Represents the location of the variable name.
+ Represents the Location of the variable name.
@@target &&= value
^^^^^^^^
- name: operator_loc
type: location
comment: |
- Represents the location of the `&&=` operator.
+ Represents the Location of the `&&=` operator.
@@target &&= value
^^^
@@ -1960,7 +2019,7 @@ nodes:
- name: name_loc
type: location
comment: |
- The location of the variable name.
+ The Location of the variable name.
@@foo = :bar
^^^^^
@@ -1978,7 +2037,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `=` operator.
+ The Location of the `=` operator.
@@foo = :bar
^
@@ -2074,7 +2133,7 @@ nodes:
- name: delimiter_loc
type: location
comment: |
- The location of the `::` delimiter.
+ The Location of the `::` delimiter.
::Foo
^^
@@ -2084,7 +2143,7 @@ nodes:
- name: name_loc
type: location
comment: |
- The location of the name of the constant.
+ The Location of the name of the constant.
::Foo
^^^
@@ -2160,7 +2219,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `=` operator.
+ The Location of the `=` operator.
::ABC = 123
^
@@ -2220,7 +2279,7 @@ nodes:
- name: name_loc
type: location
comment: |
- The location of the constant name.
+ The Location of the constant name.
FOO = 1
^^^
@@ -2238,7 +2297,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `=` operator.
+ The Location of the `=` operator.
FOO = :bar
^
@@ -2363,6 +2422,15 @@ nodes:
^^^^^^
bar
end
+ - name: ErrorRecoveryNode
+ fields:
+ - name: unexpected
+ type: node?
+ kind: Node
+ comment: |
+ The unexpected node that was found in the tree, if there was one.
+ comment: |
+ Represents a node that is either missing or unexpected and results in a syntax error.
- name: FalseNode
comment: |
Represents the use of the literal `false` keyword.
@@ -2374,23 +2442,66 @@ nodes:
- name: constant
type: node?
kind:
- - ConstantReadNode
- ConstantPathNode
+ - ConstantReadNode
+ comment: |
+ Represents the optional constant preceding the pattern
+
+ foo in Foo(*bar, baz, *qux)
+ ^^^
- name: left
type: node
kind: SplatNode
+ comment: |
+ Represents the first wildcard node in the pattern.
+
+ foo in *bar, baz, *qux
+ ^^^^
+
+ foo in Foo(*bar, baz, *qux)
+ ^^^^
- name: requireds
type: node[]
kind: pattern expression
+ comment: |
+ Represents the nodes in between the wildcards.
+
+ foo in *bar, baz, *qux
+ ^^^
+
+ foo in Foo(*bar, baz, 1, *qux)
+ ^^^^^^
- name: right
type: node
- kind:
- - SplatNode
- - on error: MissingNode
+ kind: SplatNode
+ comment: |
+ Represents the second wildcard node in the pattern.
+
+ foo in *bar, baz, *qux
+ ^^^^
+
+ foo in Foo(*bar, baz, *qux)
+ ^^^^
- name: opening_loc
type: location?
+ comment: |
+ The Location of the opening brace.
+
+ foo in [*bar, baz, *qux]
+ ^
+
+ foo in Foo(*bar, baz, *qux)
+ ^
- name: closing_loc
type: location?
+ comment: |
+ The Location of the closing brace.
+
+ foo in [*bar, baz, *qux]
+ ^
+
+ foo in Foo(*bar, baz, *qux)
+ ^
comment: |
Represents a find pattern in pattern matching.
@@ -2402,6 +2513,9 @@ nodes:
foo in Foo(*bar, baz, *qux)
^^^^^^^^^^^^^^^^^^^^
+
+ foo => *bar, baz, *qux
+ ^^^^^^^^^^^^^^^
- name: FlipFlopNode
flags: RangeFlags
fields:
@@ -2442,9 +2556,6 @@ nodes:
- CallTargetNode
- IndexTargetNode
- MultiTargetNode
- - on error: BackReferenceReadNode # for $& in a end
- - on error: NumberedReferenceReadNode # for $1 in a end
- - on error: MissingNode # for in 1..10; end
comment: |
The index expression for `for` loops.
@@ -2471,28 +2582,28 @@ nodes:
- name: for_keyword_loc
type: location
comment: |
- The location of the `for` keyword.
+ The Location of the `for` keyword.
for i in a end
^^^
- name: in_keyword_loc
type: location
comment: |
- The location of the `in` keyword.
+ The Location of the `in` keyword.
for i in a end
^^
- name: do_keyword_loc
type: location?
comment: |
- The location of the `do` keyword, if present.
+ The Location of the `do` keyword, if present.
for i in a do end
^^
- name: end_keyword_loc
type: location
comment: |
- The location of the `end` keyword.
+ The Location of the `end` keyword.
for i in a end
^^^
@@ -2518,14 +2629,29 @@ nodes:
end
- name: ForwardingSuperNode
fields:
+ - name: keyword_loc
+ type: location
+ comment: |
+ super
+ ^^^^^
+
+ super { 123 }
+ ^^^^^
- name: block
type: node?
kind: BlockNode
+ comment: |
+ All other arguments are forwarded as normal, except the original block is replaced with the new block.
comment: |
- Represents the use of the `super` keyword without parentheses or arguments.
+ Represents the use of the `super` keyword without parentheses or arguments, but which might have a block.
super
^^^^^
+
+ super { 123 }
+ ^^^^^^^^^^^^^
+
+ If it has any other arguments, it would be a `SuperNode` instead.
- name: GlobalVariableAndWriteNode
fields:
- name: name
@@ -2613,7 +2739,7 @@ nodes:
- name: name_loc
type: location
comment: |
- The location of the global variable's name.
+ The Location of the global variable's name.
$foo = :bar
^^^^
@@ -2631,7 +2757,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `=` operator.
+ The Location of the `=` operator.
$foo = :bar
^
@@ -2645,7 +2771,7 @@ nodes:
- name: opening_loc
type: location
comment: |
- The location of the opening brace.
+ The Location of the opening brace.
{ a => b }
^
@@ -2665,7 +2791,7 @@ nodes:
- name: closing_loc
type: location
comment: |
- The location of the closing brace.
+ The Location of the closing brace.
{ a => b }
^
@@ -2679,20 +2805,60 @@ nodes:
- name: constant
type: node?
kind:
- - ConstantReadNode
- ConstantPathNode
+ - ConstantReadNode
+ comment: |
+ Represents the optional constant preceding the Hash.
+
+ foo => Bar[a: 1, b: 2]
+ ^^^
+
+ foo => Bar::Baz[a: 1, b: 2]
+ ^^^^^^^^
- name: elements
type: node[]
kind: AssocNode
+ comment: |
+ Represents the explicit named hash keys and values.
+
+ foo => { a: 1, b:, ** }
+ ^^^^^^^^
- name: rest
type: node?
kind:
- AssocSplatNode
- NoKeywordsParameterNode
+ comment: |
+ Represents the rest of the Hash keys and values. This can be named, unnamed, or explicitly forbidden via `**nil`, this last one results in a `NoKeywordsParameterNode`.
+
+ foo => { a: 1, b:, **c }
+ ^^^
+
+ foo => { a: 1, b:, ** }
+ ^^
+
+ foo => { a: 1, b:, **nil }
+ ^^^^^
- name: opening_loc
type: location?
+ comment: |
+ The Location of the opening brace.
+
+ foo => { a: 1 }
+ ^
+
+ foo => Bar[a: 1]
+ ^
- name: closing_loc
type: location?
+ comment: |
+ The Location of the closing brace.
+
+ foo => { a: 1 }
+ ^
+
+ foo => Bar[a: 1]
+ ^
comment: |
Represents a hash pattern in pattern matching.
@@ -2701,12 +2867,18 @@ nodes:
foo => { a: 1, b: 2, **c }
^^^^^^^^^^^^^^^^^^^
+
+ foo => Bar[a: 1, b: 2]
+ ^^^^^^^^^^^^^^^
+
+ foo in { a: 1, b: 2 }
+ ^^^^^^^^^^^^^^
- name: IfNode
fields:
- name: if_keyword_loc
type: location?
comment: |
- The location of the `if` keyword if present.
+ The Location of the `if` keyword if present.
bar if foo
^^
@@ -2731,7 +2903,7 @@ nodes:
- name: then_keyword_loc
type: location?
comment: |
- The location of the `then` keyword (if present) or the `?` in a ternary expression, `nil` otherwise.
+ The Location of the `then` keyword (if present) or the `?` in a ternary expression, `nil` otherwise.
if foo then bar end
^^^^
@@ -2772,7 +2944,7 @@ nodes:
- name: end_keyword_loc
type: location?
comment: |
- The location of the `end` keyword if present, `nil` otherwise.
+ The Location of the `end` keyword if present, `nil` otherwise.
if foo
bar
@@ -3057,7 +3229,7 @@ nodes:
- name: name_loc
type: location
comment: |
- The location of the variable name.
+ The Location of the variable name.
@_x = 1
^^^
@@ -3075,7 +3247,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `=` operator.
+ The Location of the `=` operator.
@x = y
^
@@ -3145,7 +3317,6 @@ nodes:
- EmbeddedStatementsNode
- EmbeddedVariableNode
- InterpolatedStringNode # `"a" "#{b}"`
- - on error: XStringNode # `<<`FOO` "bar"
- name: closing_loc
type: location?
newline: parts
@@ -3353,6 +3524,9 @@ nodes:
foo, bar = baz
^^^ ^^^
+
+ foo => baz
+ ^^^
- name: LocalVariableWriteNode
fields:
- name: name
@@ -3376,7 +3550,7 @@ nodes:
- name: name_loc
type: location
comment: |
- The location of the variable name.
+ The Location of the variable name.
foo = :bar
^^^
@@ -3398,7 +3572,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `=` operator.
+ The Location of the `=` operator.
x = :y
^
@@ -3443,11 +3617,65 @@ nodes:
- name: value
type: node
kind: non-void expression
+ comment: |
+ Represents the left-hand side of the operator.
+
+ foo => bar
+ ^^^
- name: pattern
type: node
kind: pattern expression
+ comment: |
+ Represents the right-hand side of the operator. The type of the node depends on the expression.
+
+ Anything that looks like a local variable name (including `_`) will result in a `LocalVariableTargetNode`.
+
+ foo => a # This is equivalent to writing `a = foo`
+ ^
+
+ Using an explicit `Array` or combining expressions with `,` will result in a `ArrayPatternNode`. This can be preceded by a constant.
+
+ foo => [a]
+ ^^^
+
+ foo => a, b
+ ^^^^
+
+ foo => Bar[a, b]
+ ^^^^^^^^^
+
+ If the array pattern contains at least two wildcard matches, a `FindPatternNode` is created instead.
+
+ foo => *, 1, *a
+ ^^^^^
+
+ Using an explicit `Hash` or a constant with square brackets and hash keys in the square brackets will result in a `HashPatternNode`.
+
+ foo => { a: 1, b: }
+
+ foo => Bar[a: 1, b:]
+
+ foo => Bar[**]
+
+ To use any variable that needs run time evaluation, pinning is required. This results in a `PinnedVariableNode`
+
+ foo => ^a
+ ^^
+
+ Similar, any expression can be used with pinning. This results in a `PinnedExpressionNode`.
+
+ foo => ^(a + 1)
+
+ Anything else will result in the regular node for that expression, for example a `ConstantReadNode`.
+
+ foo => CONST
- name: operator_loc
type: location
+ comment: |
+ The Location of the operator.
+
+ foo => bar
+ ^^
comment: |
Represents the use of the `=>` operator.
@@ -3466,9 +3694,6 @@ nodes:
/(?<foo>bar)/ =~ baz
^^^^^^^^^^^^^^^^^^^^
- - name: MissingNode
- comment: |
- Represents a node that is missing from the source and results in a syntax error.
- name: ModuleNode
fields:
- name: locals
@@ -3480,7 +3705,6 @@ nodes:
kind:
- ConstantReadNode
- ConstantPathNode
- - on error: MissingNode # module Parent module end
- name: body
type: node?
kind:
@@ -3510,8 +3734,6 @@ nodes:
- IndexTargetNode
- MultiTargetNode
- RequiredParameterNode # def m((a,b)); end
- - on error: BackReferenceReadNode # a, (b, $&) = z
- - on error: NumberedReferenceReadNode # a, (b, $1) = z
comment: |
Represents the targets expressions before a splat node.
@@ -3555,8 +3777,6 @@ nodes:
- IndexTargetNode
- MultiTargetNode
- RequiredParameterNode # def m((*,b)); end
- - on error: BackReferenceReadNode # a, (*, $&) = z
- - on error: NumberedReferenceReadNode # a, (*, $1) = z
comment: |
Represents the targets expressions after a splat node.
@@ -3565,14 +3785,14 @@ nodes:
- name: lparen_loc
type: location?
comment: |
- The location of the opening parenthesis.
+ The Location of the opening parenthesis.
a, (b, c) = 1, 2, 3
^
- name: rparen_loc
type: location?
comment: |
- The location of the closing parenthesis.
+ The Location of the closing parenthesis.
a, (b, c) = 1, 2, 3
^
@@ -3600,8 +3820,6 @@ nodes:
- CallTargetNode
- IndexTargetNode
- MultiTargetNode
- - on error: BackReferenceReadNode # $&, = z
- - on error: NumberedReferenceReadNode # $1, = z
comment: |
Represents the targets expressions before a splat node.
@@ -3644,8 +3862,6 @@ nodes:
- CallTargetNode
- IndexTargetNode
- MultiTargetNode
- - on error: BackReferenceReadNode # *, $& = z
- - on error: NumberedReferenceReadNode # *, $1 = z
comment: |
Represents the targets expressions after a splat node.
@@ -3654,21 +3870,21 @@ nodes:
- name: lparen_loc
type: location?
comment: |
- The location of the opening parenthesis.
+ The Location of the opening parenthesis.
(a, b, c) = 1, 2, 3
^
- name: rparen_loc
type: location?
comment: |
- The location of the closing parenthesis.
+ The Location of the closing parenthesis.
(a, b, c) = 1, 2, 3
^
- name: operator_loc
type: location
comment: |
- The location of the operator.
+ The Location of the operator.
a, b, c = 1, 2, 3
^
@@ -3703,6 +3919,18 @@ nodes:
nil
^^^
+ - name: NoBlockParameterNode
+ fields:
+ - name: operator_loc
+ type: location
+ - name: keyword_loc
+ type: location
+ comment: |
+ Represents the use of `&nil` inside method arguments.
+
+ def a(&nil)
+ ^^^^
+ end
- name: NoKeywordsParameterNode
fields:
- name: operator_loc
@@ -3802,7 +4030,7 @@ nodes:
- name: operator_loc
type: location
comment: |
- The location of the `or` keyword or the `||` operator.
+ The Location of the `or` keyword or the `||` operator.
left or right
^^
@@ -3831,11 +4059,6 @@ nodes:
kind:
- RequiredParameterNode
- MultiTargetNode
- # On parsing error of `f(**kwargs, ...)` or `f(**nil, ...)`, the keyword_rest value is moved here:
- - on error: KeywordRestParameterNode
- - on error: NoKeywordsParameterNode
- # On parsing error of `f(..., ...)`, the first forwarding parameter is moved here:
- - on error: ForwardingParameterNode
- name: keywords
type: node[]
kind:
@@ -3849,7 +4072,9 @@ nodes:
- NoKeywordsParameterNode
- name: block
type: node?
- kind: BlockParameterNode
+ kind:
+ - BlockParameterNode
+ - NoBlockParameterNode
comment: |
Represents the list of parameters on a method, block, or lambda definition.
@@ -3877,12 +4102,32 @@ nodes:
- name: expression
type: node
kind: non-void expression
+ comment: |
+ The expression used in the pinned expression
+
+ foo in ^(bar)
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ The Location of the `^` operator
+
+ foo in ^(bar)
+ ^
- name: lparen_loc
type: location
+ comment: |
+ The Location of the opening parenthesis.
+
+ foo in ^(bar)
+ ^
- name: rparen_loc
type: location
+ comment: |
+ The Location of the closing parenthesis.
+
+ foo in ^(bar)
+ ^
comment: |
Represents the use of the `^` operator for pinning an expression in a pattern matching expression.
@@ -3900,9 +4145,18 @@ nodes:
- BackReferenceReadNode # foo in ^$&
- NumberedReferenceReadNode # foo in ^$1
- ItLocalVariableReadNode # proc { 1 in ^it }
- - on error: MissingNode # foo in ^Bar
+ comment: |
+ The variable used in the pinned expression
+
+ foo in ^bar
+ ^^^
- name: operator_loc
type: location
+ comment: |
+ The Location of the `^` operator
+
+ foo in ^bar
+ ^
comment: |
Represents the use of the `^` operator for pinning a variable in a pattern matching expression.
@@ -3973,11 +4227,11 @@ nodes:
1...foo
^^^
- If neither right-hand or left-hand side was included, this will be a MissingNode.
+ If neither right-hand or left-hand side was included, this will be an ErrorRecoveryNode.
- name: operator_loc
type: location
comment: |
- The location of the `..` or `...` operator.
+ The Location of the `..` or `...` operator.
comment: |
Represents the use of the `..` or `...` operators.
@@ -4088,9 +4342,6 @@ nodes:
- ConstantPathTargetNode
- CallTargetNode
- IndexTargetNode
- - on error: BackReferenceReadNode # => begin; rescue => $&; end
- - on error: NumberedReferenceReadNode # => begin; rescue => $1; end
- - on error: MissingNode # begin; rescue =>; end
- name: then_keyword_loc
type: location?
- name: statements
@@ -4203,7 +4454,7 @@ nodes:
fields:
- name: filepath
type: string
- comment: Represents the file path being parsed. This corresponds directly to the `filepath` option given to the various `Prism::parse*` APIs.
+ comment: Represents the file path being parsed. This corresponds directly to the `filepath` option given to the various `Prism.parse*` APIs.
comment: |
Represents the use of the `__FILE__` keyword.
@@ -4268,6 +4519,7 @@ nodes:
- name: arguments
type: node?
kind: ArgumentsNode
+ comment: "Can be only `nil` when there are empty parentheses, like `super()`."
- name: rparen_loc
type: location?
- name: block
@@ -4283,6 +4535,8 @@ nodes:
super foo, bar
^^^^^^^^^^^^^^
+
+ If no arguments are provided (except for a block), it would be a `ForwardingSuperNode` instead.
- name: SymbolNode
flags: SymbolFlags
fields:
@@ -4327,7 +4581,7 @@ nodes:
- name: keyword_loc
type: location
comment: |
- The location of the `unless` keyword.
+ The Location of the `unless` keyword.
unless cond then bar end
^^^^^^
@@ -4348,7 +4602,7 @@ nodes:
- name: then_keyword_loc
type: location?
comment: |
- The location of the `then` keyword, if present.
+ The Location of the `then` keyword, if present.
unless cond then bar end
^^^^
@@ -4368,11 +4622,11 @@ nodes:
The else clause of the unless expression, if present.
unless cond then bar else baz end
- ^^^^^^^^
+ ^^^^^^^^^^^^
- name: end_keyword_loc
type: location?
comment: |
- The location of the `end` keyword, if present.
+ The Location of the `end` keyword, if present.
unless cond then bar end
^^^
diff --git a/prism/util/pm_constant_pool.c b/prism/constant_pool.c
index 38ea01a228..90201ebb8e 100644
--- a/prism/util/pm_constant_pool.c
+++ b/prism/constant_pool.c
@@ -1,4 +1,11 @@
-#include "prism/util/pm_constant_pool.h"
+#include "prism/internal/constant_pool.h"
+
+#include "prism/compiler/align.h"
+#include "prism/compiler/inline.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
+#include <stdbool.h>
/**
* Initialize a list of constant ids.
@@ -14,10 +21,9 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
* Initialize a list of constant ids with a given capacity.
*/
void
-pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
+pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity) {
if (capacity) {
- list->ids = xcalloc(capacity, sizeof(pm_constant_id_t));
- if (list->ids == NULL) abort();
+ list->ids = (pm_constant_id_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_id_t), PRISM_ALIGNOF(pm_constant_id_t));
} else {
list->ids = NULL;
}
@@ -27,19 +33,23 @@ pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity)
}
/**
- * Append a constant id to a list of constant ids. Returns false if any
- * potential reallocations fail.
+ * Append a constant id to a list of constant ids.
*/
-bool
-pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
+void
+pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id) {
if (list->size >= list->capacity) {
- list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
- list->ids = (pm_constant_id_t *) xrealloc(list->ids, sizeof(pm_constant_id_t) * list->capacity);
- if (list->ids == NULL) return false;
+ size_t new_capacity = list->capacity == 0 ? 8 : list->capacity * 2;
+ pm_constant_id_t *new_ids = (pm_constant_id_t *) pm_arena_alloc(arena, sizeof(pm_constant_id_t) * new_capacity, PRISM_ALIGNOF(pm_constant_id_t));
+
+ if (list->size > 0) {
+ memcpy(new_ids, list->ids, list->size * sizeof(pm_constant_id_t));
+ }
+
+ list->ids = new_ids;
+ list->capacity = new_capacity;
}
list->ids[list->size++] = id;
- return true;
}
/**
@@ -66,29 +76,66 @@ pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id) {
}
/**
- * Free the memory associated with a list of constant ids.
+ * A multiply-xorshift hash that processes input a word at a time. This is
+ * significantly faster than the byte-at-a-time djb2 hash for the short strings
+ * typical in Ruby source (~15 bytes average). Each word is mixed into the hash
+ * by XOR followed by multiplication by a large odd constant, which spreads
+ * entropy across all bits. A final xorshift fold produces the 32-bit result.
*/
-void
-pm_constant_id_list_free(pm_constant_id_list_t *list) {
- if (list->ids != NULL) {
- xfree(list->ids);
- }
-}
-
-/**
- * A relatively simple hash function (djb2) that is used to hash strings. We are
- * optimizing here for simplicity and speed.
- */
-static inline uint32_t
+static PRISM_INLINE uint32_t
pm_constant_pool_hash(const uint8_t *start, size_t length) {
- // This is a prime number used as the initial value for the hash function.
- uint32_t value = 5381;
+ // This constant is borrowed from wyhash. It is a 64-bit odd integer with
+ // roughly equal 0/1 bits, chosen for good avalanche behavior when used in
+ // multiply-xorshift sequences.
+ static const uint64_t secret = 0x517cc1b727220a95ULL;
+ uint64_t hash = (uint64_t) length;
+
+ if (length <= 8) {
+ // Short strings: read first and last 4 bytes (overlapping for len < 8).
+ // This covers the majority of Ruby identifiers with a single multiply.
+ if (length >= 4) {
+ uint32_t a, b;
+ memcpy(&a, start, 4);
+ memcpy(&b, start + length - 4, 4);
+ hash ^= (uint64_t) a | ((uint64_t) b << 32);
+ } else if (length > 0) {
+ hash ^= (uint64_t) start[0] | ((uint64_t) start[length >> 1] << 8) | ((uint64_t) start[length - 1] << 16);
+ }
+ hash *= secret;
+ } else if (length <= 16) {
+ // Medium strings: read first and last 8 bytes (overlapping).
+ // Two multiplies instead of the three the loop-based approach needs.
+ uint64_t word;
+ memcpy(&word, start, 8);
+ hash ^= word;
+ hash *= secret;
+ memcpy(&word, start + length - 8, 8);
+ hash ^= word;
+ hash *= secret;
+ } else {
+ const uint8_t *ptr = start;
+ size_t remaining = length;
+
+ while (remaining >= 8) {
+ uint64_t word;
+ memcpy(&word, ptr, 8);
+ hash ^= word;
+ hash *= secret;
+ ptr += 8;
+ remaining -= 8;
+ }
- for (size_t index = 0; index < length; index++) {
- value = ((value << 5) + value) + start[index];
+ if (remaining > 0) {
+ // Read the last 8 bytes (overlapping with already-processed data).
+ uint64_t word;
+ memcpy(&word, start + length - 8, 8);
+ hash ^= word;
+ hash *= secret;
+ }
}
- return value;
+ hash ^= hash >> 32;
+ return (uint32_t) hash;
}
/**
@@ -121,21 +168,15 @@ is_power_of_two(uint32_t size) {
/**
* Resize a constant pool to a given capacity.
*/
-static inline bool
-pm_constant_pool_resize(pm_constant_pool_t *pool) {
+static PRISM_INLINE void
+pm_constant_pool_resize(pm_arena_t *arena, pm_constant_pool_t *pool) {
assert(is_power_of_two(pool->capacity));
uint32_t next_capacity = pool->capacity * 2;
- if (next_capacity < pool->capacity) return false;
-
const uint32_t mask = next_capacity - 1;
- const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
-
- void *next = xcalloc(next_capacity, element_size);
- if (next == NULL) return false;
- pm_constant_pool_bucket_t *next_buckets = next;
- pm_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(pm_constant_pool_bucket_t));
+ pm_constant_pool_bucket_t *next_buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, next_capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
+ pm_constant_t *next_constants = (pm_constant_t *) pm_arena_alloc(arena, next_capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
// For each bucket in the current constant pool, find the index in the
// next constant pool, and insert it.
@@ -163,33 +204,22 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
// The constants are stable with respect to hash table resizes.
memcpy(next_constants, pool->constants, pool->size * sizeof(pm_constant_t));
- // pool->constants and pool->buckets are allocated out of the same chunk
- // of memory, with the buckets coming first.
- xfree(pool->buckets);
pool->constants = next_constants;
pool->buckets = next_buckets;
pool->capacity = next_capacity;
- return true;
}
/**
* Initialize a new constant pool with a given capacity.
*/
-bool
-pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
- const uint32_t maximum = (~((uint32_t) 0));
- if (capacity >= ((maximum / 2) + 1)) return false;
-
+void
+pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity) {
capacity = next_power_of_two(capacity);
- const size_t element_size = sizeof(pm_constant_pool_bucket_t) + sizeof(pm_constant_t);
- void *memory = xcalloc(capacity, element_size);
- if (memory == NULL) return false;
- pool->buckets = memory;
- pool->constants = (void *)(((char *)memory) + capacity * sizeof(pm_constant_pool_bucket_t));
+ pool->buckets = (pm_constant_pool_bucket_t *) pm_arena_zalloc(arena, capacity * sizeof(pm_constant_pool_bucket_t), PRISM_ALIGNOF(pm_constant_pool_bucket_t));
+ pool->constants = (pm_constant_t *) pm_arena_alloc(arena, capacity * sizeof(pm_constant_t), PRISM_ALIGNOF(pm_constant_t));
pool->size = 0;
pool->capacity = capacity;
- return true;
}
/**
@@ -215,8 +245,7 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size
pm_constant_pool_bucket_t *bucket;
while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
- pm_constant_t *constant = &pool->constants[bucket->id - 1];
- if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+ if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
return bucket->id;
}
@@ -229,10 +258,10 @@ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size
/**
* Insert a constant into a constant pool and return its index in the pool.
*/
-static inline pm_constant_id_t
-pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
+static PRISM_INLINE pm_constant_id_t
+pm_constant_pool_insert(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
if (pool->size >= (pool->capacity / 4 * 3)) {
- if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
+ pm_constant_pool_resize(arena, pool);
}
assert(is_power_of_two(pool->capacity));
@@ -246,25 +275,17 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
// If there is a collision, then we need to check if the content is the
// same as the content we are trying to insert. If it is, then we can
// return the id of the existing constant.
- pm_constant_t *constant = &pool->constants[bucket->id - 1];
-
- if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+ if ((bucket->length == length) && memcmp(bucket->start, start, length) == 0) {
// Since we have found a match, we need to check if this is
// attempting to insert a shared or an owned constant. We want to
// prefer shared constants since they don't require allocations.
- if (type == PM_CONSTANT_POOL_BUCKET_OWNED) {
- // If we're attempting to insert an owned constant and we have
- // an existing constant, then either way we don't want the given
- // memory. Either it's duplicated with the existing constant or
- // it's not necessary because we have a shared version.
- xfree((void *) start);
- } else if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
+ if (type != PM_CONSTANT_POOL_BUCKET_OWNED && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
// If we're attempting to insert a shared constant and the
- // existing constant is owned, then we can free the owned
- // constant and replace it with the shared constant.
- xfree((void *) constant->start);
- constant->start = start;
- bucket->type = (unsigned int) (PM_CONSTANT_POOL_BUCKET_DEFAULT & 0x3);
+ // existing constant is owned, then we can replace it with the
+ // shared constant to prefer non-owned references.
+ bucket->start = start;
+ bucket->type = (unsigned int) (type & 0x3);
+ pool->constants[bucket->id - 1].start = start;
}
return bucket->id;
@@ -281,7 +302,9 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
*bucket = (pm_constant_pool_bucket_t) {
.id = (unsigned int) (id & 0x3fffffff),
.type = (unsigned int) (type & 0x3),
- .hash = hash
+ .hash = hash,
+ .start = start,
+ .length = length
};
pool->constants[id - 1] = (pm_constant_t) {
@@ -297,8 +320,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
* PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
*/
pm_constant_id_t
-pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
- return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
+pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_DEFAULT);
}
/**
@@ -307,8 +330,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
* potential calls to resize fail.
*/
pm_constant_id_t
-pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length) {
- return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
+pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length) {
+ return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_OWNED);
}
/**
@@ -317,26 +340,21 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t l
* resize fail.
*/
pm_constant_id_t
-pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
- return pm_constant_pool_insert(pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
+pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ return pm_constant_pool_insert(arena, pool, start, length, PM_CONSTANT_POOL_BUCKET_CONSTANT);
}
/**
- * Free the memory associated with a constant pool.
+ * Return a raw pointer to the start of a constant.
*/
-void
-pm_constant_pool_free(pm_constant_pool_t *pool) {
- // For each constant in the current constant pool, free the contents if the
- // contents are owned.
- for (uint32_t index = 0; index < pool->capacity; index++) {
- pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
-
- // If an id is set on this constant, then we know we have content here.
- if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
- pm_constant_t *constant = &pool->constants[bucket->id - 1];
- xfree((void *) constant->start);
- }
- }
+const uint8_t *
+pm_constant_start(const pm_constant_t *constant) {
+ return constant->start;
+}
- xfree(pool->buckets);
+/**
+ * Return the length of a constant.
+ */
+size_t pm_constant_length(const pm_constant_t *constant) {
+ return constant->length;
}
diff --git a/prism/constant_pool.h b/prism/constant_pool.h
new file mode 100644
index 0000000000..dc03235c70
--- /dev/null
+++ b/prism/constant_pool.h
@@ -0,0 +1,81 @@
+/**
+ * @file constant_pool.h
+ *
+ * A data structure that stores a set of strings.
+ *
+ * Each string is assigned a unique id, which can be used to compare strings for
+ * equality. This comparison ends up being much faster than strcmp, since it
+ * only requires a single integer comparison.
+ */
+#ifndef PRISM_CONSTANT_POOL_H
+#define PRISM_CONSTANT_POOL_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/arena.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A constant id is a unique identifier for a constant in the constant pool.
+ */
+typedef uint32_t pm_constant_id_t;
+
+/**
+ * A list of constant IDs. Usually used to represent a set of locals.
+ */
+typedef struct {
+ /** The number of constant ids in the list. */
+ size_t size;
+
+ /** The number of constant ids that have been allocated in the list. */
+ size_t capacity;
+
+ /** The constant ids in the list. */
+ pm_constant_id_t *ids;
+} pm_constant_id_list_t;
+
+/** A constant in the pool which effectively stores a string. */
+typedef struct pm_constant_t pm_constant_t;
+
+/**
+ * The overall constant pool, which stores constants found while parsing.
+ */
+typedef struct pm_constant_pool_t pm_constant_pool_t;
+
+/**
+ * Return a raw pointer to the start of a constant.
+ *
+ * @param constant The constant to get the start of.
+ * @returns A raw pointer to the start of the constant.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_constant_start(const pm_constant_t *constant) PRISM_NONNULL(1);
+
+/**
+ * Return the length of a constant.
+ *
+ * @param constant The constant to get the length of.
+ * @returns The length of the constant.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_constant_length(const pm_constant_t *constant) PRISM_NONNULL(1);
+
+/**
+ * Initialize a list of constant ids.
+ *
+ * @param list The list to initialize.
+ */
+PRISM_EXPORTED_FUNCTION void pm_constant_id_list_init(pm_constant_id_list_t *list) PRISM_NONNULL(1);
+
+/**
+ * Append a constant id to a list of constant ids.
+ *
+ * @param arena The arena to use for allocations.
+ * @param list The list to append to.
+ * @param id The constant id to append.
+ */
+PRISM_EXPORTED_FUNCTION void pm_constant_id_list_append(pm_arena_t *arena, pm_constant_id_list_t *list, pm_constant_id_t id) PRISM_NONNULL(1, 2);
+
+#endif
diff --git a/prism/defines.h b/prism/defines.h
deleted file mode 100644
index e31429c789..0000000000
--- a/prism/defines.h
+++ /dev/null
@@ -1,260 +0,0 @@
-/**
- * @file defines.h
- *
- * Macro definitions used throughout the prism library.
- *
- * This file should be included first by any *.h or *.c in prism for consistency
- * and to ensure that the macros are defined before they are used.
- */
-#ifndef PRISM_DEFINES_H
-#define PRISM_DEFINES_H
-
-#include <ctype.h>
-#include <limits.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-/**
- * We want to be able to use the PRI* macros for printing out integers, but on
- * some platforms they aren't included unless this is already defined.
- */
-#define __STDC_FORMAT_MACROS
-// Include sys/types.h before inttypes.h to work around issue with
-// certain versions of GCC and newlib which causes omission of PRIx64
-#include <sys/types.h>
-#include <inttypes.h>
-
-/**
- * When we are parsing using recursive descent, we want to protect against
- * malicious payloads that could attempt to crash our parser. We do this by
- * specifying a maximum depth to which we are allowed to recurse.
- */
-#ifndef PRISM_DEPTH_MAXIMUM
- #define PRISM_DEPTH_MAXIMUM 10000
-#endif
-
-/**
- * By default, we compile with -fvisibility=hidden. When this is enabled, we
- * need to mark certain functions as being publically-visible. This macro does
- * that in a compiler-agnostic way.
- */
-#ifndef PRISM_EXPORTED_FUNCTION
-# ifdef PRISM_EXPORT_SYMBOLS
-# ifdef _WIN32
-# define PRISM_EXPORTED_FUNCTION __declspec(dllexport) extern
-# else
-# define PRISM_EXPORTED_FUNCTION __attribute__((__visibility__("default"))) extern
-# endif
-# else
-# define PRISM_EXPORTED_FUNCTION
-# endif
-#endif
-
-/**
- * Certain compilers support specifying that a function accepts variadic
- * parameters that look like printf format strings to provide a better developer
- * experience when someone is using the function. This macro does that in a
- * compiler-agnostic way.
- */
-#if defined(__GNUC__)
-# if defined(__MINGW_PRINTF_FORMAT)
-# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index)))
-# else
-# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
-# endif
-#elif defined(__clang__)
-# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index)))
-#else
-# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
-#endif
-
-/**
- * GCC will warn if you specify a function or parameter that is unused at
- * runtime. This macro allows you to mark a function or parameter as unused in a
- * compiler-agnostic way.
- */
-#if defined(__GNUC__)
-# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-# define PRISM_ATTRIBUTE_UNUSED
-#endif
-
-/**
- * Old Visual Studio versions do not support the inline keyword, so we need to
- * define it to be __inline.
- */
-#if defined(_MSC_VER) && !defined(inline)
-# define inline __inline
-#endif
-
-/**
- * Old Visual Studio versions before 2015 do not implement sprintf, but instead
- * implement _snprintf. We standard that here.
- */
-#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
-# define snprintf _snprintf
-#endif
-
-/**
- * A simple utility macro to concatenate two tokens together, necessary when one
- * of the tokens is itself a macro.
- */
-#define PM_CONCATENATE(left, right) left ## right
-
-/**
- * We want to be able to use static assertions, but they weren't standardized
- * until C11. As such, we polyfill it here by making a hacky typedef that will
- * fail to compile due to a negative array size if the condition is false.
- */
-#if defined(_Static_assert)
-# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
-#else
-# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
-#endif
-
-/**
- * In general, libc for embedded systems does not support memory-mapped files.
- * If the target platform is POSIX or Windows, we can map a file in memory and
- * read it in a more efficient manner.
- */
-#ifdef _WIN32
-# define PRISM_HAS_MMAP
-#else
-# include <unistd.h>
-# ifdef _POSIX_MAPPED_FILES
-# define PRISM_HAS_MMAP
-# endif
-#endif
-
-/**
- * If PRISM_HAS_NO_FILESYSTEM is defined, then we want to exclude all filesystem
- * related code from the library. All filesystem related code should be guarded
- * by PRISM_HAS_FILESYSTEM.
- */
-#ifndef PRISM_HAS_NO_FILESYSTEM
-# define PRISM_HAS_FILESYSTEM
-#endif
-
-/**
- * isinf on POSIX systems it accepts a float, a double, or a long double.
- * But mingw didn't provide an isinf macro, only an isinf function that only
- * accepts floats, so we need to use _finite instead.
- */
-#ifdef __MINGW64__
- #include <float.h>
- #define PRISM_ISINF(x) (!_finite(x))
-#else
- #define PRISM_ISINF(x) isinf(x)
-#endif
-
-/**
- * If you build prism with a custom allocator, configure it with
- * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
- * xrealloc, xcalloc, and xfree.
- *
- * For example, your `prism_xallocator.h` file could look like this:
- *
- * ```
- * #ifndef PRISM_XALLOCATOR_H
- * #define PRISM_XALLOCATOR_H
- * #define xmalloc my_malloc
- * #define xrealloc my_realloc
- * #define xcalloc my_calloc
- * #define xfree my_free
- * #endif
- * ```
- */
-#ifdef PRISM_XALLOCATOR
- #include "prism_xallocator.h"
-#else
- #ifndef xmalloc
- /**
- * The malloc function that should be used. This can be overridden with
- * the PRISM_XALLOCATOR define.
- */
- #define xmalloc malloc
- #endif
-
- #ifndef xrealloc
- /**
- * The realloc function that should be used. This can be overridden with
- * the PRISM_XALLOCATOR define.
- */
- #define xrealloc realloc
- #endif
-
- #ifndef xcalloc
- /**
- * The calloc function that should be used. This can be overridden with
- * the PRISM_XALLOCATOR define.
- */
- #define xcalloc calloc
- #endif
-
- #ifndef xfree
- /**
- * The free function that should be used. This can be overridden with the
- * PRISM_XALLOCATOR define.
- */
- #define xfree free
- #endif
-#endif
-
-/**
- * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
- * switch that will turn off certain features of prism.
- */
-#ifdef PRISM_BUILD_MINIMAL
- /** Exclude the serialization API. */
- #define PRISM_EXCLUDE_SERIALIZATION
-
- /** Exclude the JSON serialization API. */
- #define PRISM_EXCLUDE_JSON
-
- /** Exclude the Array#pack parser API. */
- #define PRISM_EXCLUDE_PACK
-
- /** Exclude the prettyprint API. */
- #define PRISM_EXCLUDE_PRETTYPRINT
-
- /** Exclude the full set of encodings, using the minimal only. */
- #define PRISM_ENCODING_EXCLUDE_FULL
-#endif
-
-/**
- * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
- * branch predication.
- */
-#if defined(__GNUC__) || defined(__clang__)
- /** The compiler should predicate that this branch will be taken. */
- #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
-
- /** The compiler should predicate that this branch will not be taken. */
- #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
- /** Void because this platform does not support branch prediction hints. */
- #define PRISM_LIKELY(x) (x)
-
- /** Void because this platform does not support branch prediction hints. */
- #define PRISM_UNLIKELY(x) (x)
-#endif
-
-/**
- * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
- * Use PRISM_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional.
- */
-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later
- #define PRISM_FALLTHROUGH [[fallthrough]];
-#elif defined(__GNUC__) || defined(__clang__)
- #define PRISM_FALLTHROUGH __attribute__((fallthrough));
-#elif defined(_MSC_VER)
- #define PRISM_FALLTHROUGH __fallthrough;
-#else
- #define PRISM_FALLTHROUGH
-#endif
-
-#endif
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
new file mode 100644
index 0000000000..370061ec56
--- /dev/null
+++ b/prism/diagnostic.h
@@ -0,0 +1,93 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
+#ifndef PRISM_DIAGNOSTIC_H
+#define PRISM_DIAGNOSTIC_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+
+/**
+ * An opaque pointer to a diagnostic generated during parsing.
+ */
+typedef struct pm_diagnostic_t pm_diagnostic_t;
+
+/**
+ * The levels of errors generated during parsing.
+ */
+typedef enum {
+ /** For errors that should raise a syntax error. */
+ PM_ERROR_LEVEL_SYNTAX = 0,
+
+ /** For errors that should raise an argument error. */
+ PM_ERROR_LEVEL_ARGUMENT = 1,
+
+ /** For errors that should raise a load error. */
+ PM_ERROR_LEVEL_LOAD = 2
+} pm_error_level_t;
+
+/**
+ * The levels of warnings generated during parsing.
+ */
+typedef enum {
+ /** For warnings which should be emitted if $VERBOSE != nil. */
+ PM_WARNING_LEVEL_DEFAULT = 0,
+
+ /** For warnings which should be emitted if $VERBOSE == true. */
+ PM_WARNING_LEVEL_VERBOSE = 1
+} pm_warning_level_t;
+
+/**
+ * Get the type of the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the type of.
+ * @returns The type of the given diagnostic. Note that this is a string
+ * representation of an internal ID, and is not meant to be relied upon as a
+ * stable identifier for the diagnostic. We do not guarantee that these will
+ * not change in the future. This is meant to be used for debugging and
+ * error reporting purposes, and not for programmatic checks.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_type(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the location of the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the location of.
+ * @returns The location of the given diagnostic.
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_diagnostic_location(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the message of the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the message of.
+ * @returns The message of the given diagnostic.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_diagnostic_message(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the error level associated with the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the error level of.
+ * @returns The error level of the given diagnostic. If the diagnostic was a
+ * warning, or is in any way not an error, then the return value is
+ * undefined and should not be relied upon.
+ */
+PRISM_EXPORTED_FUNCTION pm_error_level_t pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+/**
+ * Get the warning level associated with the given diagnostic.
+ *
+ * @param diagnostic The diagnostic to get the warning level of.
+ * @returns The warning level of the given diagnostic. If the diagnostic was an
+ * error, or is in any way not a warning, then the return value is
+ * undefined and should not be relied upon.
+ */
+PRISM_EXPORTED_FUNCTION pm_warning_level_t pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/encoding.c b/prism/encoding.c
index a4aeed104f..c9c2e13056 100644
--- a/prism/encoding.c
+++ b/prism/encoding.c
@@ -1,8 +1,13 @@
-#include "prism/encoding.h"
+#include "prism/internal/encoding.h"
+
+#include "prism/compiler/unused.h"
+#include "prism/internal/strncasecmp.h"
+
+#include <assert.h>
typedef uint32_t pm_unicode_codepoint_t;
-#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
+#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1508
static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
@@ -10,7 +15,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x2EC, 0x2EC,
0x2EE, 0x2EE,
0x345, 0x345,
- 0x370, 0x374,
+ 0x363, 0x374,
0x376, 0x377,
0x37A, 0x37D,
0x37F, 0x37F,
@@ -50,7 +55,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x840, 0x858,
0x860, 0x86A,
0x870, 0x887,
- 0x889, 0x88E,
+ 0x889, 0x88F,
+ 0x897, 0x897,
0x8A0, 0x8C9,
0x8D4, 0x8DF,
0x8E3, 0x8E9,
@@ -140,7 +146,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0xC4A, 0xC4C,
0xC55, 0xC56,
0xC58, 0xC5A,
- 0xC5D, 0xC5D,
+ 0xC5C, 0xC5D,
0xC60, 0xC63,
0xC80, 0xC83,
0xC85, 0xC8C,
@@ -152,7 +158,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0xCC6, 0xCC8,
0xCCA, 0xCCC,
0xCD5, 0xCD6,
- 0xCDD, 0xCDE,
+ 0xCDC, 0xCDE,
0xCE0, 0xCE3,
0xCF1, 0xCF3,
0xD00, 0xD0C,
@@ -264,7 +270,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1C00, 0x1C36,
0x1C4D, 0x1C4F,
0x1C5A, 0x1C7D,
- 0x1C80, 0x1C88,
+ 0x1C80, 0x1C8A,
0x1C90, 0x1CBA,
0x1CBD, 0x1CBF,
0x1CE9, 0x1CEC,
@@ -272,7 +278,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1CF5, 0x1CF6,
0x1CFA, 0x1CFA,
0x1D00, 0x1DBF,
- 0x1DE7, 0x1DF4,
+ 0x1DD3, 0x1DF4,
0x1E00, 0x1F15,
0x1F18, 0x1F1D,
0x1F20, 0x1F45,
@@ -352,11 +358,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0xA67F, 0xA6EF,
0xA717, 0xA71F,
0xA722, 0xA788,
- 0xA78B, 0xA7CA,
- 0xA7D0, 0xA7D1,
- 0xA7D3, 0xA7D3,
- 0xA7D5, 0xA7D9,
- 0xA7F2, 0xA805,
+ 0xA78B, 0xA7DC,
+ 0xA7F1, 0xA805,
0xA807, 0xA827,
0xA840, 0xA873,
0xA880, 0xA8C3,
@@ -446,6 +449,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x105A3, 0x105B1,
0x105B3, 0x105B9,
0x105BB, 0x105BC,
+ 0x105C0, 0x105F3,
0x10600, 0x10736,
0x10740, 0x10755,
0x10760, 0x10767,
@@ -464,6 +468,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x108F4, 0x108F5,
0x10900, 0x10915,
0x10920, 0x10939,
+ 0x10940, 0x10959,
0x10980, 0x109B7,
0x109BE, 0x109BF,
0x10A00, 0x10A03,
@@ -483,9 +488,14 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x10C80, 0x10CB2,
0x10CC0, 0x10CF2,
0x10D00, 0x10D27,
+ 0x10D4A, 0x10D65,
+ 0x10D69, 0x10D69,
+ 0x10D6F, 0x10D85,
0x10E80, 0x10EA9,
0x10EAB, 0x10EAC,
0x10EB0, 0x10EB1,
+ 0x10EC2, 0x10EC7,
+ 0x10EFA, 0x10EFC,
0x10F00, 0x10F1C,
0x10F27, 0x10F27,
0x10F30, 0x10F45,
@@ -529,6 +539,17 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x11350, 0x11350,
0x11357, 0x11357,
0x1135D, 0x11363,
+ 0x11380, 0x11389,
+ 0x1138B, 0x1138B,
+ 0x1138E, 0x1138E,
+ 0x11390, 0x113B5,
+ 0x113B7, 0x113C0,
+ 0x113C2, 0x113C2,
+ 0x113C5, 0x113C5,
+ 0x113C7, 0x113CA,
+ 0x113CC, 0x113CD,
+ 0x113D1, 0x113D1,
+ 0x113D3, 0x113D3,
0x11400, 0x11441,
0x11443, 0x11445,
0x11447, 0x1144A,
@@ -567,6 +588,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x11A50, 0x11A97,
0x11A9D, 0x11A9D,
0x11AB0, 0x11AF8,
+ 0x11B60, 0x11B67,
+ 0x11BC0, 0x11BE0,
0x11C00, 0x11C08,
0x11C0A, 0x11C36,
0x11C38, 0x11C3E,
@@ -588,6 +611,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x11D90, 0x11D91,
0x11D93, 0x11D96,
0x11D98, 0x11D98,
+ 0x11DB0, 0x11DDB,
0x11EE0, 0x11EF6,
0x11F00, 0x11F10,
0x11F12, 0x11F3A,
@@ -599,7 +623,9 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x12F90, 0x12FF0,
0x13000, 0x1342F,
0x13441, 0x13446,
+ 0x13460, 0x143FA,
0x14400, 0x14646,
+ 0x16100, 0x1612E,
0x16800, 0x16A38,
0x16A40, 0x16A5E,
0x16A70, 0x16ABE,
@@ -608,16 +634,19 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x16B40, 0x16B43,
0x16B63, 0x16B77,
0x16B7D, 0x16B8F,
+ 0x16D40, 0x16D6C,
0x16E40, 0x16E7F,
+ 0x16EA0, 0x16EB8,
+ 0x16EBB, 0x16ED3,
0x16F00, 0x16F4A,
0x16F4F, 0x16F87,
0x16F8F, 0x16F9F,
0x16FE0, 0x16FE1,
0x16FE3, 0x16FE3,
- 0x16FF0, 0x16FF1,
- 0x17000, 0x187F7,
- 0x18800, 0x18CD5,
- 0x18D00, 0x18D08,
+ 0x16FF0, 0x16FF6,
+ 0x17000, 0x18CD5,
+ 0x18CFF, 0x18D1E,
+ 0x18D80, 0x18DF2,
0x1AFF0, 0x1AFF3,
0x1AFF5, 0x1AFFB,
0x1AFFD, 0x1AFFE,
@@ -677,6 +706,11 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1E290, 0x1E2AD,
0x1E2C0, 0x1E2EB,
0x1E4D0, 0x1E4EB,
+ 0x1E5D0, 0x1E5ED,
+ 0x1E5F0, 0x1E5F0,
+ 0x1E6C0, 0x1E6DE,
+ 0x1E6E0, 0x1E6F5,
+ 0x1E6FE, 0x1E6FF,
0x1E7E0, 0x1E7E6,
0x1E7E8, 0x1E7EB,
0x1E7ED, 0x1E7EE,
@@ -722,16 +756,16 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
0x1F150, 0x1F169,
0x1F170, 0x1F189,
0x20000, 0x2A6DF,
- 0x2A700, 0x2B739,
- 0x2B740, 0x2B81D,
- 0x2B820, 0x2CEA1,
+ 0x2A700, 0x2B81D,
+ 0x2B820, 0x2CEAD,
0x2CEB0, 0x2EBE0,
+ 0x2EBF0, 0x2EE5D,
0x2F800, 0x2FA1D,
0x30000, 0x3134A,
- 0x31350, 0x323AF,
+ 0x31350, 0x33479,
};
-#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
+#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1598
static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
0x100, 0x2C1,
0x2C6, 0x2D1,
@@ -739,7 +773,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x2EC, 0x2EC,
0x2EE, 0x2EE,
0x345, 0x345,
- 0x370, 0x374,
+ 0x363, 0x374,
0x376, 0x377,
0x37A, 0x37D,
0x37F, 0x37F,
@@ -778,7 +812,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x840, 0x858,
0x860, 0x86A,
0x870, 0x887,
- 0x889, 0x88E,
+ 0x889, 0x88F,
+ 0x897, 0x897,
0x8A0, 0x8C9,
0x8D4, 0x8DF,
0x8E3, 0x8E9,
@@ -872,7 +907,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0xC4A, 0xC4C,
0xC55, 0xC56,
0xC58, 0xC5A,
- 0xC5D, 0xC5D,
+ 0xC5C, 0xC5D,
0xC60, 0xC63,
0xC66, 0xC6F,
0xC80, 0xC83,
@@ -885,7 +920,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0xCC6, 0xCC8,
0xCCA, 0xCCC,
0xCD5, 0xCD6,
- 0xCDD, 0xCDE,
+ 0xCDC, 0xCDE,
0xCE0, 0xCE3,
0xCE6, 0xCEF,
0xCF1, 0xCF3,
@@ -1007,7 +1042,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1C00, 0x1C36,
0x1C40, 0x1C49,
0x1C4D, 0x1C7D,
- 0x1C80, 0x1C88,
+ 0x1C80, 0x1C8A,
0x1C90, 0x1CBA,
0x1CBD, 0x1CBF,
0x1CE9, 0x1CEC,
@@ -1015,7 +1050,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1CF5, 0x1CF6,
0x1CFA, 0x1CFA,
0x1D00, 0x1DBF,
- 0x1DE7, 0x1DF4,
+ 0x1DD3, 0x1DF4,
0x1E00, 0x1F15,
0x1F18, 0x1F1D,
0x1F20, 0x1F45,
@@ -1094,11 +1129,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0xA67F, 0xA6EF,
0xA717, 0xA71F,
0xA722, 0xA788,
- 0xA78B, 0xA7CA,
- 0xA7D0, 0xA7D1,
- 0xA7D3, 0xA7D3,
- 0xA7D5, 0xA7D9,
- 0xA7F2, 0xA805,
+ 0xA78B, 0xA7DC,
+ 0xA7F1, 0xA805,
0xA807, 0xA827,
0xA840, 0xA873,
0xA880, 0xA8C3,
@@ -1191,6 +1223,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x105A3, 0x105B1,
0x105B3, 0x105B9,
0x105BB, 0x105BC,
+ 0x105C0, 0x105F3,
0x10600, 0x10736,
0x10740, 0x10755,
0x10760, 0x10767,
@@ -1209,6 +1242,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x108F4, 0x108F5,
0x10900, 0x10915,
0x10920, 0x10939,
+ 0x10940, 0x10959,
0x10980, 0x109B7,
0x109BE, 0x109BF,
0x10A00, 0x10A03,
@@ -1229,9 +1263,14 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x10CC0, 0x10CF2,
0x10D00, 0x10D27,
0x10D30, 0x10D39,
+ 0x10D40, 0x10D65,
+ 0x10D69, 0x10D69,
+ 0x10D6F, 0x10D85,
0x10E80, 0x10EA9,
0x10EAB, 0x10EAC,
0x10EB0, 0x10EB1,
+ 0x10EC2, 0x10EC7,
+ 0x10EFA, 0x10EFC,
0x10F00, 0x10F1C,
0x10F27, 0x10F27,
0x10F30, 0x10F45,
@@ -1278,6 +1317,17 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11350, 0x11350,
0x11357, 0x11357,
0x1135D, 0x11363,
+ 0x11380, 0x11389,
+ 0x1138B, 0x1138B,
+ 0x1138E, 0x1138E,
+ 0x11390, 0x113B5,
+ 0x113B7, 0x113C0,
+ 0x113C2, 0x113C2,
+ 0x113C5, 0x113C5,
+ 0x113C7, 0x113CA,
+ 0x113CC, 0x113CD,
+ 0x113D1, 0x113D1,
+ 0x113D3, 0x113D3,
0x11400, 0x11441,
0x11443, 0x11445,
0x11447, 0x1144A,
@@ -1297,6 +1347,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11680, 0x116B5,
0x116B8, 0x116B8,
0x116C0, 0x116C9,
+ 0x116D0, 0x116E3,
0x11700, 0x1171A,
0x1171D, 0x1172A,
0x11730, 0x11739,
@@ -1322,6 +1373,9 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11A50, 0x11A97,
0x11A9D, 0x11A9D,
0x11AB0, 0x11AF8,
+ 0x11B60, 0x11B67,
+ 0x11BC0, 0x11BE0,
+ 0x11BF0, 0x11BF9,
0x11C00, 0x11C08,
0x11C0A, 0x11C36,
0x11C38, 0x11C3E,
@@ -1346,6 +1400,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x11D93, 0x11D96,
0x11D98, 0x11D98,
0x11DA0, 0x11DA9,
+ 0x11DB0, 0x11DDB,
+ 0x11DE0, 0x11DE9,
0x11EE0, 0x11EF6,
0x11F00, 0x11F10,
0x11F12, 0x11F3A,
@@ -1358,7 +1414,10 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x12F90, 0x12FF0,
0x13000, 0x1342F,
0x13441, 0x13446,
+ 0x13460, 0x143FA,
0x14400, 0x14646,
+ 0x16100, 0x1612E,
+ 0x16130, 0x16139,
0x16800, 0x16A38,
0x16A40, 0x16A5E,
0x16A60, 0x16A69,
@@ -1370,16 +1429,20 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x16B50, 0x16B59,
0x16B63, 0x16B77,
0x16B7D, 0x16B8F,
+ 0x16D40, 0x16D6C,
+ 0x16D70, 0x16D79,
0x16E40, 0x16E7F,
+ 0x16EA0, 0x16EB8,
+ 0x16EBB, 0x16ED3,
0x16F00, 0x16F4A,
0x16F4F, 0x16F87,
0x16F8F, 0x16F9F,
0x16FE0, 0x16FE1,
0x16FE3, 0x16FE3,
- 0x16FF0, 0x16FF1,
- 0x17000, 0x187F7,
- 0x18800, 0x18CD5,
- 0x18D00, 0x18D08,
+ 0x16FF0, 0x16FF6,
+ 0x17000, 0x18CD5,
+ 0x18CFF, 0x18D1E,
+ 0x18D80, 0x18DF2,
0x1AFF0, 0x1AFF3,
0x1AFF5, 0x1AFFB,
0x1AFFD, 0x1AFFE,
@@ -1394,6 +1457,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1BC80, 0x1BC88,
0x1BC90, 0x1BC99,
0x1BC9E, 0x1BC9E,
+ 0x1CCF0, 0x1CCF9,
0x1D400, 0x1D454,
0x1D456, 0x1D49C,
0x1D49E, 0x1D49F,
@@ -1443,6 +1507,11 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1E2F0, 0x1E2F9,
0x1E4D0, 0x1E4EB,
0x1E4F0, 0x1E4F9,
+ 0x1E5D0, 0x1E5ED,
+ 0x1E5F0, 0x1E5FA,
+ 0x1E6C0, 0x1E6DE,
+ 0x1E6E0, 0x1E6F5,
+ 0x1E6FE, 0x1E6FF,
0x1E7E0, 0x1E7E6,
0x1E7E8, 0x1E7EB,
0x1E7ED, 0x1E7EE,
@@ -1490,16 +1559,16 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
0x1F170, 0x1F189,
0x1FBF0, 0x1FBF9,
0x20000, 0x2A6DF,
- 0x2A700, 0x2B739,
- 0x2B740, 0x2B81D,
- 0x2B820, 0x2CEA1,
+ 0x2A700, 0x2B81D,
+ 0x2B820, 0x2CEAD,
0x2CEB0, 0x2EBE0,
+ 0x2EBF0, 0x2EE5D,
0x2F800, 0x2FA1D,
0x30000, 0x3134A,
- 0x31350, 0x323AF,
+ 0x31350, 0x33479,
};
-#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1302
+#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1320
static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
0x100, 0x100,
0x102, 0x102,
@@ -1774,6 +1843,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x10C7, 0x10C7,
0x10CD, 0x10CD,
0x13A0, 0x13F5,
+ 0x1C89, 0x1C89,
0x1C90, 0x1CBA,
0x1CBD, 0x1CBF,
0x1E00, 0x1E00,
@@ -2103,9 +2173,15 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0xA7C2, 0xA7C2,
0xA7C4, 0xA7C7,
0xA7C9, 0xA7C9,
+ 0xA7CB, 0xA7CC,
+ 0xA7CE, 0xA7CE,
0xA7D0, 0xA7D0,
+ 0xA7D2, 0xA7D2,
+ 0xA7D4, 0xA7D4,
0xA7D6, 0xA7D6,
0xA7D8, 0xA7D8,
+ 0xA7DA, 0xA7DA,
+ 0xA7DC, 0xA7DC,
0xA7F5, 0xA7F5,
0xFF21, 0xFF3A,
0x10400, 0x10427,
@@ -2115,8 +2191,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
0x1058C, 0x10592,
0x10594, 0x10595,
0x10C80, 0x10CB2,
+ 0x10D50, 0x10D65,
0x118A0, 0x118BF,
0x16E40, 0x16E5F,
+ 0x16EA0, 0x16EB8,
0x1D400, 0x1D419,
0x1D434, 0x1D44D,
0x1D468, 0x1D481,
@@ -2304,6 +2382,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
*/
size_t
pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
@@ -2324,6 +2406,10 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
*/
size_t
pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
@@ -2344,6 +2430,10 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
*/
bool
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
@@ -2362,7 +2452,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
static pm_unicode_codepoint_t
pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
- if (b[0] < 0x80) {
+
+ if ((n > 0) && (b[0] < 0x80)) {
*width = 1;
return (pm_unicode_codepoint_t) b[0];
}
@@ -2401,6 +2492,10 @@ pm_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
static size_t
pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
size_t width;
pm_cesu_8_codepoint(b, n, &width);
return width;
@@ -2408,6 +2503,10 @@ pm_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
}
@@ -2424,6 +2523,10 @@ pm_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
@@ -2440,6 +2543,10 @@ pm_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
static bool
pm_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
if (*b < 0x80) {
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
}
@@ -3855,14 +3962,14 @@ static const uint8_t pm_encoding_windows_874_table[256] = {
};
#define PRISM_ENCODING_TABLE(name) \
- static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \
+ static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, ptrdiff_t n) { \
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT)); \
} \
- static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \
+ static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, ptrdiff_t n) { \
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; \
} \
- static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \
- return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \
+ static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, ptrdiff_t n) { \
+ return ((n > 0) && (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT)); \
}
PRISM_ENCODING_TABLE(cp850)
@@ -3931,8 +4038,8 @@ PRISM_ENCODING_TABLE(windows_874)
* means that if the top bit is not set, the character is 1 byte long.
*/
static size_t
-pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return *b < 0x80 ? 1 : 0;
+pm_encoding_ascii_char_width(const uint8_t *b, ptrdiff_t n) {
+ return ((n > 0) && (*b < 0x80)) ? 1 : 0;
}
/**
@@ -3940,8 +4047,8 @@ pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
* alphabetical character.
*/
static size_t
-pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT);
+pm_encoding_ascii_alpha_char(const uint8_t *b, ptrdiff_t n) {
+ return (n > 0) ? (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) : 0;
}
/**
@@ -3951,7 +4058,7 @@ pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
*/
static size_t
pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
- return (*b < 0x80) ? pm_encoding_ascii_alpha_char(b, n) : 0;
+ return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alpha_char(b, n) : 0;
}
/**
@@ -3959,8 +4066,8 @@ pm_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) {
* alphanumeric character.
*/
static size_t
-pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0;
+pm_encoding_ascii_alnum_char(const uint8_t *b, ptrdiff_t n) {
+ return ((n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
}
/**
@@ -3970,7 +4077,7 @@ pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t
*/
static size_t
pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
- return (*b < 0x80) ? pm_encoding_ascii_alnum_char(b, n) : 0;
+ return ((n > 0) && (*b < 0x80)) ? pm_encoding_ascii_alnum_char(b, n) : 0;
}
/**
@@ -3978,8 +4085,8 @@ pm_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) {
* character.
*/
static bool
-pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
- return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
+pm_encoding_ascii_isupper_char(const uint8_t *b, ptrdiff_t n) {
+ return (n > 0) && (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT);
}
/**
@@ -3987,7 +4094,7 @@ pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_
* matter what the codepoint, so this function is shared between them.
*/
static size_t
-pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) {
+pm_encoding_single_char_width(PRISM_UNUSED const uint8_t *b, PRISM_UNUSED ptrdiff_t n) {
return 1;
}
@@ -3998,7 +4105,7 @@ pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATT
static size_t
pm_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4042,6 +4149,9 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) {
*/
static size_t
pm_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) {
+ if (n == 0) {
+ return 0;
+ }
// These are the single byte characters.
if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) {
return 1;
@@ -4105,7 +4215,7 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) {
*/
static bool
pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
- return (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
+ return (n > 0) && (*b < 0x80) && pm_encoding_ascii_isupper_char(b, n);
}
/**
@@ -4115,7 +4225,7 @@ pm_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4134,7 +4244,7 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters
- if (*b <= 0x80) {
+ if ((n > 0) && (*b <= 0x80)) {
return 1;
}
@@ -4153,7 +4263,7 @@ pm_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the 1 byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4196,7 +4306,7 @@ pm_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4215,7 +4325,7 @@ pm_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4239,7 +4349,7 @@ pm_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the 1 byte characters.
- if (*b < 0x80) {
+ if ((n > 0) && (*b < 0x80)) {
return 1;
}
@@ -4263,7 +4373,7 @@ pm_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) {
static size_t
pm_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) {
// These are the single byte characters.
- if (*b <= 0x80) {
+ if ((n > 0) && (*b <= 0x80)) {
return 1;
}
diff --git a/prism/excludes.h b/prism/excludes.h
new file mode 100644
index 0000000000..8600622f63
--- /dev/null
+++ b/prism/excludes.h
@@ -0,0 +1,29 @@
+/**
+ * @file excludes.h
+ *
+ * A header file that defines macros to exclude certain features of the prism
+ * library. This is useful for reducing the size of the library when certain
+ * features are not needed.
+ */
+#ifndef PRISM_EXCLUDES_H
+#define PRISM_EXCLUDES_H
+
+/**
+ * If PRISM_BUILD_MINIMAL is defined, then we're going to define every possible
+ * switch that will turn off certain features of prism.
+ */
+#ifdef PRISM_BUILD_MINIMAL
+ /** Exclude the serialization API. */
+ #define PRISM_EXCLUDE_SERIALIZATION
+
+ /** Exclude the JSON serialization API. */
+ #define PRISM_EXCLUDE_JSON
+
+ /** Exclude the prettyprint API. */
+ #define PRISM_EXCLUDE_PRETTYPRINT
+
+ /** Exclude the full set of encodings, using the minimal only. */
+ #define PRISM_ENCODING_EXCLUDE_FULL
+#endif
+
+#endif
diff --git a/prism/extension.c b/prism/extension.c
index 1533ca7bb3..27df8dac50 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -4,6 +4,8 @@
#include <ruby/win32.h>
#endif
+#include <errno.h>
+
// NOTE: this file should contain only bindings. All non-trivial logic should be
// in libprism so it can be shared its the various callers.
@@ -25,6 +27,7 @@ VALUE rb_cPrismLexResult;
VALUE rb_cPrismParseLexResult;
VALUE rb_cPrismStringQuery;
VALUE rb_cPrismScope;
+VALUE rb_cPrismCurrentVersionError;
VALUE rb_cPrismDebugEncoding;
@@ -63,18 +66,6 @@ check_string(VALUE value) {
return RSTRING_PTR(value);
}
-/**
- * Load the contents and size of the given string into the given pm_string_t.
- */
-static void
-input_load_string(pm_string_t *input, VALUE string) {
- // Check if the string is a string. If it's not, then raise a type error.
- if (!RB_TYPE_P(string, T_STRING)) {
- rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (expected String)", rb_obj_class(string));
- }
-
- pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
-}
/******************************************************************************/
/* Building C options from Ruby options */
@@ -147,10 +138,8 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
// Initialize the scope array.
size_t locals_count = RARRAY_LEN(locals);
- pm_options_scope_t *options_scope = &options->scopes[scope_index];
- if (!pm_options_scope_init(options_scope, locals_count)) {
- rb_raise(rb_eNoMemError, "failed to allocate memory");
- }
+ pm_options_scope_t *options_scope = pm_options_scope_mut(options, scope_index);
+ pm_options_scope_init(options_scope, locals_count);
// Iterate over the locals and add them to the scope.
for (size_t local_index = 0; local_index < locals_count; local_index++) {
@@ -163,7 +152,7 @@ build_options_scopes(pm_options_t *options, VALUE scopes) {
}
// Add the local to the scope.
- pm_string_t *scope_local = &options_scope->locals[local_index];
+ pm_string_t *scope_local = pm_options_scope_local_mut(options_scope, local_index);
const char *name = rb_id2name(SYM2ID(local));
pm_string_constant_init(scope_local, name, strlen(name));
}
@@ -199,7 +188,21 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
if (!NIL_P(value)) {
const char *version = check_string(value);
- if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
+ if (RSTRING_LEN(value) == 7 && strncmp(version, "current", 7) == 0) {
+ if (!pm_options_version_set(options, ruby_version, 3)) {
+ rb_exc_raise(rb_exc_new_cstr(rb_cPrismCurrentVersionError, ruby_version));
+ }
+ } else if (RSTRING_LEN(value) == 7 && strncmp(version, "nearest", 7) == 0) {
+ if (!pm_options_version_set(options, ruby_version, 3)) {
+ // Prism doesn't know this specific version. Is it lower?
+ if (ruby_version[0] < '3' || (ruby_version[0] == '3' && ruby_version[2] < '3')) {
+ pm_options_version_set_lowest(options);
+ } else {
+ // Must be higher.
+ pm_options_version_set_highest(options);
+ }
+ }
+ } else if (!pm_options_version_set(options, version, RSTRING_LEN(value))) {
rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value);
}
}
@@ -263,7 +266,7 @@ build_options(VALUE argument) {
*/
static void
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
- options->line = 1; // default
+ pm_options_line_set(options, 1); /* default */
if (!NIL_P(keywords)) {
struct build_options_data data = { .options = options, .keywords = keywords };
@@ -291,36 +294,46 @@ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
/**
* Read options for methods that look like (source, **options).
*/
-static void
-string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) {
+static VALUE
+string_options(int argc, VALUE *argv, pm_options_t *options) {
VALUE string;
VALUE keywords;
rb_scan_args(argc, argv, "1:", &string, &keywords);
+ if (!RB_TYPE_P(string, T_STRING)) {
+ pm_options_free(options);
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
+ }
+
extract_options(options, Qnil, keywords);
- input_load_string(input, string);
+ return string;
}
/**
* Read options for methods that look like (filepath, **options).
*/
-static void
-file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, VALUE *encoded_filepath) {
+static pm_source_t *
+file_options(int argc, VALUE *argv, pm_options_t *options, VALUE *encoded_filepath) {
VALUE filepath;
VALUE keywords;
rb_scan_args(argc, argv, "1:", &filepath, &keywords);
- Check_Type(filepath, T_STRING);
+ if (!RB_TYPE_P(filepath, T_STRING)) {
+ pm_options_free(options);
+ rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
+ }
+
*encoded_filepath = rb_str_encode_ospath(filepath);
extract_options(options, *encoded_filepath, keywords);
- const char *source = (const char *) pm_string_source(&options->filepath);
- pm_string_init_result_t result;
+ const char *source = (const char *) pm_string_source(pm_options_filepath(options));
+ pm_source_init_result_t result;
+ pm_source_t *pm_src = pm_source_file_new(source, &result);
- switch (result = pm_string_file_init(input, source)) {
- case PM_STRING_INIT_SUCCESS:
+ switch (result) {
+ case PM_SOURCE_INIT_SUCCESS:
break;
- case PM_STRING_INIT_ERROR_GENERIC: {
+ case PM_SOURCE_INIT_ERROR_GENERIC: {
pm_options_free(options);
#ifdef _WIN32
@@ -332,7 +345,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V
rb_syserr_fail(e, source);
break;
}
- case PM_STRING_INIT_ERROR_DIRECTORY:
+ case PM_SOURCE_INIT_ERROR_DIRECTORY:
pm_options_free(options);
rb_syserr_fail(EISDIR, source);
break;
@@ -341,6 +354,8 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V
rb_raise(rb_eRuntimeError, "Unknown error (%d) initializing file: %s", result, source);
break;
}
+
+ return pm_src;
}
#ifndef PRISM_EXCLUDE_SERIALIZATION
@@ -353,77 +368,82 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options, V
* Dump the AST corresponding to the given input to a string.
*/
static VALUE
-dump_input(pm_string_t *input, const pm_options_t *options) {
- pm_buffer_t buffer;
- if (!pm_buffer_init(&buffer)) {
+dump_input(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+ pm_buffer_t *buffer = pm_buffer_new();
+ if (!buffer) {
rb_raise(rb_eNoMemError, "failed to allocate memory");
}
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
- pm_node_t *node = pm_parse(&parser);
- pm_serialize(&parser, node, &buffer);
+ pm_node_t *node = pm_parse(parser);
+ pm_serialize(parser, node, buffer);
- VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer));
- pm_node_destroy(&parser, node);
- pm_buffer_free(&buffer);
- pm_parser_free(&parser);
+ VALUE result = rb_str_new(pm_buffer_value(buffer), pm_buffer_length(buffer));
+ pm_buffer_free(buffer);
+ pm_parser_free(parser);
+ pm_arena_free(arena);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::dump(source, **options) -> String
+ * dump(source, **options) -> String
*
* Dump the AST corresponding to the given string to a string. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
dump(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
- string_options(argc, argv, &input, &options);
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
+
+ const uint8_t *source = (const uint8_t *) RSTRING_PTR(string);
+ size_t length = RSTRING_LEN(string);
#ifdef PRISM_BUILD_DEBUG
- size_t length = pm_string_length(&input);
char* dup = xmalloc(length);
- memcpy(dup, pm_string_source(&input), length);
- pm_string_constant_init(&input, dup, length);
+ memcpy(dup, source, length);
+ source = (const uint8_t *) dup;
#endif
- VALUE value = dump_input(&input, &options);
- if (options.freeze) rb_obj_freeze(value);
+ VALUE value = dump_input(source, length, options);
+ if (pm_options_freeze(options)) rb_obj_freeze(value);
#ifdef PRISM_BUILD_DEBUG
+#ifdef xfree_sized
+ xfree_sized(dup, length);
+#else
xfree(dup);
#endif
+#endif
- pm_string_free(&input);
- pm_options_free(&options);
+ pm_options_free(options);
return value;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::dump_file(filepath, **options) -> String
+ * dump_file(filepath, **options) -> String
*
* Dump the AST corresponding to the given file to a string. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
dump_file(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- VALUE value = dump_input(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE value = dump_input(pm_source_source(src), pm_source_length(src), options);
+ pm_source_free(src);
+ pm_options_free(options);
return value;
}
@@ -449,42 +469,49 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free
* Create a new Location instance from the given parser and bounds.
*/
static inline VALUE
-parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
- VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
+parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) {
+ VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
}
/**
* Create a new Location instance from the given parser and location.
*/
-#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
- parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
+#define PARSER_LOCATION(source, freeze, location) \
+ parser_location(source, freeze, location.start, location.length)
/**
* Build a new Comment instance from the given parser and comment.
*/
static inline VALUE
-parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
- VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
- VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
+parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) {
+ VALUE argv[] = { PARSER_LOCATION(source, freeze, pm_comment_location(comment)) };
+ VALUE type = (pm_comment_type(comment) == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
return rb_class_new_instance_freeze(1, argv, type, freeze);
}
+typedef struct {
+ VALUE comments;
+ VALUE source;
+ bool freeze;
+} parser_comments_each_data_t;
+
+static void
+parser_comments_each(const pm_comment_t *comment, void *data) {
+ parser_comments_each_data_t *each_data = (parser_comments_each_data_t *) data;
+ VALUE value = parser_comment(each_data->source, each_data->freeze, comment);
+ rb_ary_push(each_data->comments, value);
+}
+
/**
* Extract the comments out of the parser into an array.
*/
static VALUE
parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
- VALUE comments = rb_ary_new_capa(parser->comment_list.size);
-
- for (
- const pm_comment_t *comment = (const pm_comment_t *) parser->comment_list.head;
- comment != NULL;
- comment = (const pm_comment_t *) comment->node.next
- ) {
- VALUE value = parser_comment(parser, source, freeze, comment);
- rb_ary_push(comments, value);
- }
+ VALUE comments = rb_ary_new_capa(pm_parser_comments_size(parser));
+
+ parser_comments_each_data_t each_data = { comments, source, freeze };
+ pm_parser_comments_each(parser, parser_comments_each, &each_data);
if (freeze) rb_obj_freeze(comments);
return comments;
@@ -494,28 +521,39 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
* Build a new MagicComment instance from the given parser and magic comment.
*/
static inline VALUE
-parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
- VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
- VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
+parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
+ pm_location_t key = pm_magic_comment_key(magic_comment);
+ pm_location_t value = pm_magic_comment_value(magic_comment);
+
+ VALUE key_loc = parser_location(source, freeze, key.start, key.length);
+ VALUE value_loc = parser_location(source, freeze, value.start, value.length);
+
VALUE argv[] = { key_loc, value_loc };
return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
}
+typedef struct {
+ VALUE magic_comments;
+ VALUE source;
+ bool freeze;
+} parser_magic_comments_each_data_t;
+
+static void
+parser_magic_comments_each(const pm_magic_comment_t *magic_comment, void *data) {
+ parser_magic_comments_each_data_t *each_data = (parser_magic_comments_each_data_t *) data;
+ VALUE value = parser_magic_comment(each_data->source, each_data->freeze, magic_comment);
+ rb_ary_push(each_data->magic_comments, value);
+}
+
/**
* Extract the magic comments out of the parser into an array.
*/
static VALUE
parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
- VALUE magic_comments = rb_ary_new_capa(parser->magic_comment_list.size);
-
- for (
- const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) parser->magic_comment_list.head;
- magic_comment != NULL;
- magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
- ) {
- VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
- rb_ary_push(magic_comments, value);
- }
+ VALUE magic_comments = rb_ary_new_capa(pm_parser_magic_comments_size(parser));
+
+ parser_magic_comments_each_data_t each_data = { magic_comments, source, freeze };
+ pm_parser_magic_comments_each(parser, parser_magic_comments_each, &each_data);
if (freeze) rb_obj_freeze(magic_comments);
return magic_comments;
@@ -527,85 +565,109 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
*/
static VALUE
parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
- if (parser->data_loc.end == NULL) {
+ const pm_location_t *data_loc = pm_parser_data_loc(parser);
+
+ if (data_loc->length == 0) {
return Qnil;
} else {
- return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
+ return parser_location(source, freeze, data_loc->start, data_loc->length);
}
}
+typedef struct {
+ VALUE errors;
+ rb_encoding *encoding;
+ VALUE source;
+ bool freeze;
+} parser_errors_each_data_t;
+
+static void
+parser_errors_each(const pm_diagnostic_t *diagnostic, void *data) {
+ parser_errors_each_data_t *each_data = (parser_errors_each_data_t *) data;
+
+ VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(diagnostic)));
+ VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), each_data->encoding));
+ VALUE location = PARSER_LOCATION(each_data->source, each_data->freeze, pm_diagnostic_location(diagnostic));
+
+ pm_error_level_t error_level = pm_diagnostic_error_level(diagnostic);
+ VALUE level = Qnil;
+
+ switch (error_level) {
+ case PM_ERROR_LEVEL_SYNTAX:
+ level = ID2SYM(rb_intern("syntax"));
+ break;
+ case PM_ERROR_LEVEL_ARGUMENT:
+ level = ID2SYM(rb_intern("argument"));
+ break;
+ case PM_ERROR_LEVEL_LOAD:
+ level = ID2SYM(rb_intern("load"));
+ break;
+ default:
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error_level);
+ }
+
+ VALUE argv[] = { type, message, location, level };
+ VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, each_data->freeze);
+ rb_ary_push(each_data->errors, value);
+}
+
/**
* Extract the errors out of the parser into an array.
*/
static VALUE
parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
- VALUE errors = rb_ary_new_capa(parser->error_list.size);
-
- for (
- const pm_diagnostic_t *error = (const pm_diagnostic_t *) parser->error_list.head;
- error != NULL;
- error = (const pm_diagnostic_t *) error->node.next
- ) {
- VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
- VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
- VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
-
- VALUE level = Qnil;
- switch (error->level) {
- case PM_ERROR_LEVEL_SYNTAX:
- level = ID2SYM(rb_intern("syntax"));
- break;
- case PM_ERROR_LEVEL_ARGUMENT:
- level = ID2SYM(rb_intern("argument"));
- break;
- case PM_ERROR_LEVEL_LOAD:
- level = ID2SYM(rb_intern("load"));
- break;
- default:
- rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level);
- }
+ VALUE errors = rb_ary_new_capa(pm_parser_errors_size(parser));
- VALUE argv[] = { type, message, location, level };
- VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseError, freeze);
- rb_ary_push(errors, value);
- }
+ parser_errors_each_data_t each_data = { errors, encoding, source, freeze };
+ pm_parser_errors_each(parser, parser_errors_each, &each_data);
if (freeze) rb_obj_freeze(errors);
return errors;
}
+typedef struct {
+ VALUE warnings;
+ rb_encoding *encoding;
+ VALUE source;
+ bool freeze;
+} parser_warnings_each_data_t;
+
+static void
+parser_warnings_each(const pm_diagnostic_t *diagnostic, void *data) {
+ parser_warnings_each_data_t *each_data = (parser_warnings_each_data_t *) data;
+
+ VALUE type = ID2SYM(rb_intern(pm_diagnostic_type(diagnostic)));
+ VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(pm_diagnostic_message(diagnostic), each_data->encoding));
+ VALUE location = PARSER_LOCATION(each_data->source, each_data->freeze, pm_diagnostic_location(diagnostic));
+
+ pm_warning_level_t warning_level = pm_diagnostic_warning_level(diagnostic);
+ VALUE level = Qnil;
+
+ switch (warning_level) {
+ case PM_WARNING_LEVEL_DEFAULT:
+ level = ID2SYM(rb_intern("default"));
+ break;
+ case PM_WARNING_LEVEL_VERBOSE:
+ level = ID2SYM(rb_intern("verbose"));
+ break;
+ default:
+ rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning_level);
+ }
+
+ VALUE argv[] = { type, message, location, level };
+ VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, each_data->freeze);
+ rb_ary_push(each_data->warnings, value);
+}
+
/**
* Extract the warnings out of the parser into an array.
*/
static VALUE
parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bool freeze) {
- VALUE warnings = rb_ary_new_capa(parser->warning_list.size);
-
- for (
- const pm_diagnostic_t *warning = (const pm_diagnostic_t *) parser->warning_list.head;
- warning != NULL;
- warning = (const pm_diagnostic_t *) warning->node.next
- ) {
- VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
- VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
- VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
-
- VALUE level = Qnil;
- switch (warning->level) {
- case PM_WARNING_LEVEL_DEFAULT:
- level = ID2SYM(rb_intern("default"));
- break;
- case PM_WARNING_LEVEL_VERBOSE:
- level = ID2SYM(rb_intern("verbose"));
- break;
- default:
- rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level);
- }
+ VALUE warnings = rb_ary_new_capa(pm_parser_warnings_size(parser));
- VALUE argv[] = { type, message, location, level };
- VALUE value = rb_class_new_instance_freeze(4, argv, rb_cPrismParseWarning, freeze);
- rb_ary_push(warnings, value);
- }
+ parser_warnings_each_data_t each_data = { warnings, encoding, source, freeze };
+ pm_parser_warnings_each(parser, parser_warnings_each, &each_data);
if (freeze) rb_obj_freeze(warnings);
return warnings;
@@ -623,10 +685,11 @@ parse_result_create(VALUE class, const pm_parser_t *parser, VALUE value, rb_enco
parser_data_loc(parser, source, freeze),
parser_errors(parser, encoding, source, freeze),
parser_warnings(parser, encoding, source, freeze),
+ pm_parser_continuable(parser) ? Qtrue : Qfalse,
source
};
- return rb_class_new_instance_freeze(7, result_argv, class, freeze);
+ return rb_class_new_instance_freeze(8, result_argv, class, freeze);
}
/******************************************************************************/
@@ -651,11 +714,11 @@ typedef struct {
* onto the tokens array.
*/
static void
-parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
- parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
+parse_lex_token(pm_parser_t *parser, pm_token_t *token, void *data) {
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) data;
VALUE value = pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source, parse_lex_data->freeze);
- VALUE yields = rb_assoc_new(value, INT2FIX(parser->lex_state));
+ VALUE yields = rb_assoc_new(value, INT2FIX(pm_parser_lex_state(parser)));
if (parse_lex_data->freeze) {
rb_obj_freeze(value);
@@ -672,8 +735,8 @@ parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) {
*/
static void
parse_lex_encoding_changed_callback(pm_parser_t *parser) {
- parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
- parse_lex_data->encoding = rb_enc_find(parser->encoding->name);
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) pm_parser_lex_callback_data(parser);
+ parse_lex_data->encoding = rb_enc_find(pm_parser_encoding_name(parser));
// Since the encoding changed, we need to go back and change the encoding of
// the tokens that were already lexed. This is only going to end up being
@@ -718,43 +781,38 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) {
* the nodes and tokens.
*/
static VALUE
-parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) {
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
- pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
+parse_lex_input(const uint8_t *input, size_t input_length, const pm_options_t *options, bool return_nodes) {
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
+ pm_parser_encoding_changed_callback_set(parser, parse_lex_encoding_changed_callback);
- VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input));
- VALUE offsets = rb_ary_new_capa(parser.newline_list.size);
- VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(parser.start_line), offsets);
+ VALUE source_string = rb_str_new((const char *) input, input_length);
+ VALUE offsets = rb_ary_new_capa(pm_parser_line_offsets(parser)->size);
+ VALUE source = rb_funcall(rb_cPrismSource, rb_id_source_for, 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets);
parse_lex_data_t parse_lex_data = {
.source = source,
.tokens = rb_ary_new(),
- .encoding = rb_utf8_encoding(),
- .freeze = options->freeze,
+ .encoding = rb_enc_find(pm_parser_encoding_name(parser)),
+ .freeze = pm_options_freeze(options),
};
parse_lex_data_t *data = &parse_lex_data;
- pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
- .data = (void *) data,
- .callback = parse_lex_token,
- };
+ pm_parser_lex_callback_set(parser, parse_lex_token, data);
- parser.lex_callback = &lex_callback;
- pm_node_t *node = pm_parse(&parser);
+ pm_node_t *node = pm_parse(parser);
- // Here we need to update the Source object to have the correct
- // encoding for the source string and the correct newline offsets.
- // We do it here because we've already created the Source object and given
- // it over to all of the tokens, and both of these are only set after pm_parse().
- rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ /* Update the Source object with the correct encoding and line offsets,
+ * which are only available after pm_parse() completes. */
+ rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
rb_enc_associate(source_string, encoding);
- for (size_t index = 0; index < parser.newline_list.size; index++) {
- rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index]));
+ const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
+ for (size_t index = 0; index < line_offsets->size; index++) {
+ rb_ary_store(offsets, (long) index, ULONG2NUM(line_offsets->offsets[index]));
}
- if (options->freeze) {
+ if (pm_options_freeze(options)) {
rb_obj_freeze(source_string);
rb_obj_freeze(offsets);
rb_obj_freeze(source);
@@ -764,58 +822,57 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
VALUE result;
if (return_nodes) {
VALUE value = rb_ary_new_capa(2);
- rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source, options->freeze));
+ rb_ary_push(value, pm_ast_new(parser, node, parse_lex_data.encoding, source, pm_options_freeze(options)));
rb_ary_push(value, parse_lex_data.tokens);
- if (options->freeze) rb_obj_freeze(value);
- result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source, options->freeze);
+ if (pm_options_freeze(options)) rb_obj_freeze(value);
+ result = parse_result_create(rb_cPrismParseLexResult, parser, value, parse_lex_data.encoding, source, pm_options_freeze(options));
} else {
- result = parse_result_create(rb_cPrismLexResult, &parser, parse_lex_data.tokens, parse_lex_data.encoding, source, options->freeze);
+ result = parse_result_create(rb_cPrismLexResult, parser, parse_lex_data.tokens, parse_lex_data.encoding, source, pm_options_freeze(options));
}
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
+ pm_parser_free(parser);
+ pm_arena_free(arena);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::lex(source, **options) -> LexResult
+ * lex(source, **options) -> LexResult
*
* Return a LexResult instance that contains an array of Token instances
- * corresponding to the given string. For supported options, see Prism::parse.
+ * corresponding to the given string. For supported options, see Prism.parse.
*/
static VALUE
lex(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
- string_options(argc, argv, &input, &options);
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
- VALUE result = parse_lex_input(&input, &options, false);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE result = parse_lex_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options, false);
+ pm_options_free(options);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::lex_file(filepath, **options) -> LexResult
+ * lex_file(filepath, **options) -> LexResult
*
* Return a LexResult instance that contains an array of Token instances
- * corresponding to the given file. For supported options, see Prism::parse.
+ * corresponding to the given file. For supported options, see Prism.parse.
*/
static VALUE
lex_file(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- VALUE value = parse_lex_input(&input, &options, false);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE value = parse_lex_input(pm_source_source(src), pm_source_length(src), options, false);
+ pm_source_free(src);
+ pm_options_free(options);
return value;
}
@@ -828,30 +885,32 @@ lex_file(int argc, VALUE *argv, VALUE self) {
* Parse the given input and return a ParseResult instance.
*/
static VALUE
-parse_input(pm_string_t *input, const pm_options_t *options) {
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+parse_input(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
- pm_node_t *node = pm_parse(&parser);
- rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ pm_node_t *node = pm_parse(parser);
+ rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
- VALUE source = pm_source_new(&parser, encoding, options->freeze);
- VALUE value = pm_ast_new(&parser, node, encoding, source, options->freeze);
- VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options->freeze);
+ bool freeze = pm_options_freeze(options);
+ VALUE source = pm_source_new(parser, encoding, freeze);
+ VALUE value = pm_ast_new(parser, node, encoding, source, freeze);
+ VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, freeze);
- if (options->freeze) {
+ if (freeze) {
rb_obj_freeze(source);
}
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
+ pm_parser_free(parser);
+ pm_arena_free(arena);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse(source, **options) -> ParseResult
+ * parse(source, **options) -> ParseResult
*
* Parse the given string and return a ParseResult instance. The options that
* are supported are:
@@ -888,51 +947,57 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
* version of Ruby syntax (which you can trigger with `nil` or
* `"latest"`). You may also restrict the syntax to a specific version of
* Ruby, e.g., with `"3.3.0"`. To parse with the same syntax version that
- * the current Ruby is running use `version: RUBY_VERSION`. Raises
- * ArgumentError if the version is not currently supported by Prism.
+ * the current Ruby is running use `version: "current"`. To parse with the
+ * nearest version to the current Ruby that is running, use
+ * `version: "nearest"`. Raises ArgumentError if the version is not
+ * currently supported by Prism.
*/
static VALUE
parse(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
- string_options(argc, argv, &input, &options);
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
+
+ const uint8_t *source = (const uint8_t *) RSTRING_PTR(string);
+ size_t length = RSTRING_LEN(string);
#ifdef PRISM_BUILD_DEBUG
- size_t length = pm_string_length(&input);
char* dup = xmalloc(length);
- memcpy(dup, pm_string_source(&input), length);
- pm_string_constant_init(&input, dup, length);
+ memcpy(dup, source, length);
+ source = (const uint8_t *) dup;
#endif
- VALUE value = parse_input(&input, &options);
+ VALUE value = parse_input(source, length, options);
#ifdef PRISM_BUILD_DEBUG
+#ifdef xfree_sized
+ xfree_sized(dup, length);
+#else
xfree(dup);
#endif
+#endif
- pm_string_free(&input);
- pm_options_free(&options);
+ pm_options_free(options);
return value;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_file(filepath, **options) -> ParseResult
+ * parse_file(filepath, **options) -> ParseResult
*
* Parse the given file and return a ParseResult instance. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
parse_file(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- VALUE value = parse_input(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE value = parse_input(pm_source_source(src), pm_source_length(src), options);
+ pm_source_free(src);
+ pm_options_free(options);
return value;
}
@@ -941,59 +1006,66 @@ parse_file(int argc, VALUE *argv, VALUE self) {
* Parse the given input and return nothing.
*/
static void
-profile_input(pm_string_t *input, const pm_options_t *options) {
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+profile_input(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
- pm_node_t *node = pm_parse(&parser);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
+ pm_parse(parser);
+ pm_parser_free(parser);
+ pm_arena_free(arena);
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::profile(source, **options) -> nil
+ * profile(source, **options) -> nil
*
* Parse the given string and return nothing. This method is meant to allow
* profilers to avoid the overhead of reifying the AST to Ruby. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
profile(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
- string_options(argc, argv, &input, &options);
- profile_input(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ profile_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options);
+ pm_options_free(options);
return Qnil;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::profile_file(filepath, **options) -> nil
+ * profile_file(filepath, **options) -> nil
*
* Parse the given file and return nothing. This method is meant to allow
* profilers to avoid the overhead of reifying the AST to Ruby. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
profile_file(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- profile_input(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ profile_input(pm_source_source(src), pm_source_length(src), options);
+ pm_source_free(src);
+ pm_options_free(options);
return Qnil;
}
+static int
+parse_stream_eof(void *stream) {
+ if (rb_funcall((VALUE) stream, rb_intern("eof?"), 0)) {
+ return 1;
+ }
+ return 0;
+}
+
/**
* An implementation of fgets that is suitable for use with Ruby IO objects.
*/
@@ -1016,11 +1088,12 @@ parse_stream_fgets(char *string, int size, void *stream) {
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_stream(stream, **options) -> ParseResult
+ * parse_stream(stream, **options) -> ParseResult
*
* Parse the given object that responds to `gets` and return a ParseResult
- * instance. The options that are supported are the same as Prism::parse.
+ * instance. The options that are supported are the same as Prism.parse.
*/
static VALUE
parse_stream(int argc, VALUE *argv, VALUE self) {
@@ -1028,22 +1101,24 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
VALUE keywords;
rb_scan_args(argc, argv, "1:", &stream, &keywords);
- pm_options_t options = { 0 };
- extract_options(&options, Qnil, keywords);
+ pm_options_t *options = pm_options_new();
+ extract_options(options, Qnil, keywords);
- pm_parser_t parser;
- pm_buffer_t buffer;
+ pm_source_t *src = pm_source_stream_new((void *) stream, parse_stream_fgets, parse_stream_eof);
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser;
- pm_node_t *node = pm_parse_stream(&parser, &buffer, (void *) stream, parse_stream_fgets, &options);
- rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ pm_node_t *node = pm_parse_stream(&parser, arena, src, options);
+ rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
- VALUE source = pm_source_new(&parser, encoding, options.freeze);
- VALUE value = pm_ast_new(&parser, node, encoding, source, options.freeze);
- VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source, options.freeze);
+ VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options));
+ VALUE value = pm_ast_new(parser, node, encoding, source, pm_options_freeze(options));
+ VALUE result = parse_result_create(rb_cPrismParseResult, parser, value, encoding, source, pm_options_freeze(options));
- pm_node_destroy(&parser, node);
- pm_buffer_free(&buffer);
- pm_parser_free(&parser);
+ pm_source_free(src);
+ pm_parser_free(parser);
+ pm_arena_free(arena);
+ pm_options_free(options);
return result;
}
@@ -1052,116 +1127,114 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
* Parse the given input and return an array of Comment objects.
*/
static VALUE
-parse_input_comments(pm_string_t *input, const pm_options_t *options) {
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+parse_input_comments(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
- pm_node_t *node = pm_parse(&parser);
- rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ pm_parse(parser);
+ rb_encoding *encoding = rb_enc_find(pm_parser_encoding_name(parser));
- VALUE source = pm_source_new(&parser, encoding, options->freeze);
- VALUE comments = parser_comments(&parser, source, options->freeze);
+ VALUE source = pm_source_new(parser, encoding, pm_options_freeze(options));
+ VALUE comments = parser_comments(parser, source, pm_options_freeze(options));
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
+ pm_parser_free(parser);
+ pm_arena_free(arena);
return comments;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_comments(source, **options) -> Array
+ * parse_comments(source, **options) -> Array
*
* Parse the given string and return an array of Comment objects. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
parse_comments(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
- string_options(argc, argv, &input, &options);
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
- VALUE result = parse_input_comments(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE result = parse_input_comments((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options);
+ pm_options_free(options);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_file_comments(filepath, **options) -> Array
+ * parse_file_comments(filepath, **options) -> Array
*
* Parse the given file and return an array of Comment objects. For supported
- * options, see Prism::parse.
+ * options, see Prism.parse.
*/
static VALUE
parse_file_comments(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- VALUE value = parse_input_comments(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE value = parse_input_comments(pm_source_source(src), pm_source_length(src), options);
+ pm_source_free(src);
+ pm_options_free(options);
return value;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_lex(source, **options) -> ParseLexResult
+ * parse_lex(source, **options) -> ParseLexResult
*
* Parse the given string and return a ParseLexResult instance that contains a
* 2-element array, where the first element is the AST and the second element is
* an array of Token instances.
*
* This API is only meant to be used in the case where you need both the AST and
- * the tokens. If you only need one or the other, use either Prism::parse or
- * Prism::lex.
+ * the tokens. If you only need one or the other, use either Prism.parse or
+ * Prism.lex.
*
- * For supported options, see Prism::parse.
+ * For supported options, see Prism.parse.
*/
static VALUE
parse_lex(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
- string_options(argc, argv, &input, &options);
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
- VALUE value = parse_lex_input(&input, &options, true);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE value = parse_lex_input((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options, true);
+ pm_options_free(options);
return value;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_lex_file(filepath, **options) -> ParseLexResult
+ * parse_lex_file(filepath, **options) -> ParseLexResult
*
* Parse the given file and return a ParseLexResult instance that contains a
* 2-element array, where the first element is the AST and the second element is
* an array of Token instances.
*
* This API is only meant to be used in the case where you need both the AST and
- * the tokens. If you only need one or the other, use either Prism::parse_file
- * or Prism::lex_file.
+ * the tokens. If you only need one or the other, use either Prism.parse_file
+ * or Prism.lex_file.
*
- * For supported options, see Prism::parse.
+ * For supported options, see Prism.parse.
*/
static VALUE
parse_lex_file(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- VALUE value = parse_lex_input(&input, &options, true);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE value = parse_lex_input(pm_source_source(src), pm_source_length(src), options, true);
+ pm_source_free(src);
+ pm_options_free(options);
return value;
}
@@ -1170,45 +1243,45 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) {
* Parse the given input and return true if it parses without errors.
*/
static VALUE
-parse_input_success_p(pm_string_t *input, const pm_options_t *options) {
- pm_parser_t parser;
- pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options);
+parse_input_success_p(const uint8_t *input, size_t input_length, const pm_options_t *options) {
+ pm_arena_t *arena = pm_arena_new();
+ pm_parser_t *parser = pm_parser_new(arena, input, input_length, options);
- pm_node_t *node = pm_parse(&parser);
- pm_node_destroy(&parser, node);
+ pm_parse(parser);
- VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse;
- pm_parser_free(&parser);
+ VALUE result = pm_parser_errors_size(parser) == 0 ? Qtrue : Qfalse;
+ pm_parser_free(parser);
+ pm_arena_free(arena);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_success?(source, **options) -> bool
+ * parse_success?(source, **options) -> bool
*
* Parse the given string and return true if it parses without errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
*/
static VALUE
parse_success_p(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
- string_options(argc, argv, &input, &options);
+ pm_options_t *options = pm_options_new();
+ VALUE string = string_options(argc, argv, options);
- VALUE result = parse_input_success_p(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE result = parse_input_success_p((const uint8_t *) RSTRING_PTR(string), RSTRING_LEN(string), options);
+ pm_options_free(options);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_failure?(source, **options) -> bool
+ * parse_failure?(source, **options) -> bool
*
* Parse the given string and return true if it parses with errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
*/
static VALUE
parse_failure_p(int argc, VALUE *argv, VALUE self) {
@@ -1216,33 +1289,34 @@ parse_failure_p(int argc, VALUE *argv, VALUE self) {
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_file_success?(filepath, **options) -> bool
+ * parse_file_success?(filepath, **options) -> bool
*
* Parse the given file and return true if it parses without errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
*/
static VALUE
parse_file_success_p(int argc, VALUE *argv, VALUE self) {
- pm_string_t input;
- pm_options_t options = { 0 };
+ pm_options_t *options = pm_options_new();
VALUE encoded_filepath;
- file_options(argc, argv, &input, &options, &encoded_filepath);
+ pm_source_t *src = file_options(argc, argv, options, &encoded_filepath);
- VALUE result = parse_input_success_p(&input, &options);
- pm_string_free(&input);
- pm_options_free(&options);
+ VALUE result = parse_input_success_p(pm_source_source(src), pm_source_length(src), options);
+ pm_source_free(src);
+ pm_options_free(options);
return result;
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::parse_file_failure?(filepath, **options) -> bool
+ * parse_file_failure?(filepath, **options) -> bool
*
* Parse the given file and return true if it parses with errors. For
- * supported options, see Prism::parse.
+ * supported options, see Prism.parse.
*/
static VALUE
parse_file_failure_p(int argc, VALUE *argv, VALUE self) {
@@ -1272,8 +1346,9 @@ string_query(pm_string_query_t result) {
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::StringQuery::local?(string) -> bool
+ * local?(string) -> bool
*
* Returns true if the string constitutes a valid local variable name. Note that
* this means the names that can be set through Binding#local_variable_set, not
@@ -1286,8 +1361,9 @@ string_query_local_p(VALUE self, VALUE string) {
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::StringQuery::constant?(string) -> bool
+ * constant?(string) -> bool
*
* Returns true if the string constitutes a valid constant name. Note that this
* means the names that can be set through Module#const_set, not necessarily the
@@ -1300,8 +1376,9 @@ string_query_constant_p(VALUE self, VALUE string) {
}
/**
+ * :markup: markdown
* call-seq:
- * Prism::StringQuery::method_name?(string) -> bool
+ * method_name?(string) -> bool
*
* Returns true if the string constitutes a valid method name.
*/
@@ -1356,6 +1433,8 @@ Init_prism(void) {
rb_cPrismStringQuery = rb_define_class_under(rb_cPrism, "StringQuery", rb_cObject);
rb_cPrismScope = rb_define_class_under(rb_cPrism, "Scope", rb_cObject);
+ rb_cPrismCurrentVersionError = rb_const_get(rb_cPrism, rb_intern("CurrentVersionError"));
+
// Intern all of the IDs eagerly that we support so that we don't have to do
// it every time we parse.
rb_id_option_command_line = rb_intern_const("command_line");
@@ -1407,5 +1486,4 @@ Init_prism(void) {
// Next, initialize the other APIs.
Init_prism_api_node();
- Init_prism_pack();
}
diff --git a/prism/extension.h b/prism/extension.h
index 506da2fd6f..d0cbc2ff53 100644
--- a/prism/extension.h
+++ b/prism/extension.h
@@ -1,10 +1,11 @@
#ifndef PRISM_EXT_NODE_H
#define PRISM_EXT_NODE_H
-#define EXPECTED_PRISM_VERSION "1.4.0"
+#define EXPECTED_PRISM_VERSION "1.9.0"
#include <ruby.h>
#include <ruby/encoding.h>
+#include <ruby/version.h>
#include "prism.h"
VALUE pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze);
@@ -13,7 +14,6 @@ VALUE pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *
VALUE pm_integer_new(const pm_integer_t *integer);
void Init_prism_api_node(void);
-void Init_prism_pack(void);
RUBY_FUNC_EXPORTED void Init_prism(void);
#endif
diff --git a/prism/util/pm_integer.c b/prism/integer.c
index 4170ecc58d..1b69dbdceb 100644
--- a/prism/util/pm_integer.c
+++ b/prism/integer.c
@@ -1,4 +1,25 @@
-#include "prism/util/pm_integer.h"
+#include "prism/internal/integer.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/buffer.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Free the internal memory of an integer. This memory will only be allocated if
+ * the integer exceeds the size of a single uint32_t.
+ */
+static void
+pm_integer_free(pm_integer_t *integer) {
+ if (integer->values) {
+ xfree(integer->values);
+ }
+}
/**
* Pull out the length and values from the integer, regardless of the form in
@@ -374,7 +395,7 @@ pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, u
}
}
- xfree(bigints);
+ xfree_sized(bigints, bigints_length * sizeof(pm_integer_t));
bigints = next_bigints;
bigints_length = next_length;
}
@@ -383,7 +404,7 @@ pm_integer_convert_base(pm_integer_t *destination, const pm_integer_t *source, u
destination->negative = source->negative;
pm_integer_normalize(destination);
- xfree(bigints);
+ xfree_sized(bigints, bigints_length * sizeof(pm_integer_t));
pm_integer_free(&base);
}
@@ -422,7 +443,7 @@ pm_integer_parse_powof2(pm_integer_t *integer, uint32_t base, const uint8_t *dig
static void
pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t digits_length) {
const size_t batch = 9;
- size_t length = (digits_length + batch - 1) / batch;
+ const size_t length = (digits_length + batch - 1) / batch;
uint32_t *values = (uint32_t *) xcalloc(length, sizeof(uint32_t));
uint32_t value = 0;
@@ -439,7 +460,7 @@ pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t di
// Convert base from 10**9 to 1<<32.
pm_integer_convert_base(integer, &((pm_integer_t) { .length = length, .values = values, .value = 0, .negative = false }), 1000000000, ((uint64_t) 1 << 32));
- xfree(values);
+ xfree_sized(values, length * sizeof(uint32_t));
}
/**
@@ -448,7 +469,8 @@ pm_integer_parse_decimal(pm_integer_t *integer, const uint8_t *digits, size_t di
static void
pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *start, const uint8_t *end) {
// Allocate an array to store digits.
- uint8_t *digits = xmalloc(sizeof(uint8_t) * (size_t) (end - start));
+ const size_t digits_capa = sizeof(uint8_t) * (size_t) (end - start);
+ uint8_t *digits = xmalloc(digits_capa);
size_t digits_length = 0;
for (; start < end; start++) {
@@ -463,7 +485,7 @@ pm_integer_parse_big(pm_integer_t *integer, uint32_t multiplier, const uint8_t *
pm_integer_parse_powof2(integer, multiplier, digits, digits_length);
}
- xfree(digits);
+ xfree_sized(digits, digits_capa);
}
/**
@@ -603,7 +625,7 @@ void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator) {
/**
* Convert an integer to a decimal string.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
if (integer->negative) {
pm_buffer_append_byte(buffer, '-');
@@ -635,7 +657,7 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
}
// Allocate a buffer that we'll copy the decimal digits into.
- size_t digits_length = converted.length * 9;
+ const size_t digits_length = converted.length * 9;
char *digits = xcalloc(digits_length, sizeof(char));
if (digits == NULL) return;
@@ -654,17 +676,6 @@ pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer) {
// Finally, append the string to the buffer and free the digits.
pm_buffer_append_string(buffer, digits + start_offset, digits_length - start_offset);
- xfree(digits);
+ xfree_sized(digits, sizeof(char) * digits_length);
pm_integer_free(&converted);
}
-
-/**
- * Free the internal memory of an integer. This memory will only be allocated if
- * the integer exceeds the size of a single uint32_t.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_integer_free(pm_integer_t *integer) {
- if (integer->values) {
- xfree(integer->values);
- }
-}
diff --git a/prism/integer.h b/prism/integer.h
new file mode 100644
index 0000000000..9285986885
--- /dev/null
+++ b/prism/integer.h
@@ -0,0 +1,41 @@
+/**
+ * @file integer.h
+ *
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_INTEGER_H
+#define PRISM_INTEGER_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A structure represents an arbitrary-sized integer.
+ */
+typedef struct {
+ /**
+ * The number of allocated values. length is set to 0 if the integer fits
+ * into uint32_t.
+ */
+ size_t length;
+
+ /**
+ * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
+ */
+ uint32_t *values;
+
+ /**
+ * Embedded value for small integer. This value is set to 0 if the value
+ * does not fit into uint32_t.
+ */
+ uint32_t value;
+
+ /**
+ * Whether or not the integer is negative. It is stored this way so that a
+ * zeroed pm_integer_t is always positive zero.
+ */
+ bool negative;
+} pm_integer_t;
+
+#endif
diff --git a/prism/internal/allocator.h b/prism/internal/allocator.h
new file mode 100644
index 0000000000..6c54010dbf
--- /dev/null
+++ b/prism/internal/allocator.h
@@ -0,0 +1,68 @@
+#ifndef PRISM_INTERNAL_ALLOCATOR_H
+#define PRISM_INTERNAL_ALLOCATOR_H
+
+/* If you build Prism with a custom allocator, configure it with
+ * "-D PRISM_XALLOCATOR" to use your own allocator that defines xmalloc,
+ * xrealloc, xcalloc, and xfree.
+ *
+ * For example, your `prism_xallocator.h` file could look like this:
+ *
+ * ```
+ * #ifndef PRISM_XALLOCATOR_H
+ * #define PRISM_XALLOCATOR_H
+ * #define xmalloc my_malloc
+ * #define xrealloc my_realloc
+ * #define xcalloc my_calloc
+ * #define xfree my_free
+ * #define xrealloc_sized my_realloc_sized // (optional)
+ * #define xfree_sized my_free_sized // (optional)
+ * #endif
+ * ```
+ */
+#ifdef PRISM_XALLOCATOR
+ #include "prism_xallocator.h"
+#else
+ #ifndef xmalloc
+ /* The malloc function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define. */
+ #define xmalloc malloc
+ #endif
+
+ #ifndef xrealloc
+ /* The realloc function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define. */
+ #define xrealloc realloc
+ #endif
+
+ #ifndef xcalloc
+ /* The calloc function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define. */
+ #define xcalloc calloc
+ #endif
+
+ #ifndef xfree
+ /* The free function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define. */
+ #define xfree free
+ #endif
+#endif
+
+#ifndef xfree_sized
+ /* The free_sized function that should be used. This can be overridden with
+ * the PRISM_XALLOCATOR define. If not defined, defaults to calling xfree.
+ */
+ #define xfree_sized(p, s) xfree(((void)(s), (p)))
+#endif
+
+#ifndef xrealloc_sized
+ /* The xrealloc_sized function that should be used. This can be overridden
+ * with the PRISM_XALLOCATOR define. If not defined, defaults to calling
+ * xrealloc. */
+ #define xrealloc_sized(p, ns, os) xrealloc((p), ((void)(os), (ns)))
+#endif
+
+#ifdef PRISM_BUILD_DEBUG
+ #include "prism/internal/allocator_debug.h"
+#endif
+
+#endif
diff --git a/prism/internal/allocator_debug.h b/prism/internal/allocator_debug.h
new file mode 100644
index 0000000000..846e96ba2d
--- /dev/null
+++ b/prism/internal/allocator_debug.h
@@ -0,0 +1,88 @@
+#ifndef PRISM_INTERNAL_ALLOCATOR_DEBUG_H
+#define PRISM_INTERNAL_ALLOCATOR_DEBUG_H
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void *
+pm_allocator_debug_malloc(size_t size) {
+ size_t *memory = xmalloc(size + sizeof(size_t));
+ memory[0] = size;
+ return memory + 1;
+}
+
+static inline void *
+pm_allocator_debug_calloc(size_t nmemb, size_t size) {
+ size_t total_size = nmemb * size;
+ void *ptr = pm_allocator_debug_malloc(total_size);
+ memset(ptr, 0, total_size);
+ return ptr;
+}
+
+static inline void *
+pm_allocator_debug_realloc(void *ptr, size_t size) {
+ if (ptr == NULL) {
+ return pm_allocator_debug_malloc(size);
+ }
+
+ size_t *memory = (size_t *)ptr;
+ void *raw_memory = memory - 1;
+ memory = (size_t *)xrealloc(raw_memory, size + sizeof(size_t));
+ memory[0] = size;
+ return memory + 1;
+}
+
+static inline void
+pm_allocator_debug_free(void *ptr) {
+ if (ptr != NULL) {
+ size_t *memory = (size_t *)ptr;
+ xfree(memory - 1);
+ }
+}
+
+static inline void
+pm_allocator_debug_free_sized(void *ptr, size_t old_size) {
+ if (ptr != NULL) {
+ size_t *memory = (size_t *)ptr;
+ if (old_size != memory[-1]) {
+ fprintf(stderr, "[BUG] buffer %p was allocated with size %lu but freed with size %lu\n", ptr, memory[-1], old_size);
+ abort();
+ }
+ xfree_sized(memory - 1, old_size + sizeof(size_t));
+ }
+}
+
+static inline void *
+pm_allocator_debug_realloc_sized(void *ptr, size_t size, size_t old_size) {
+ if (ptr == NULL) {
+ if (old_size != 0) {
+ fprintf(stderr, "[BUG] realloc_sized called with NULL pointer and old size %lu\n", old_size);
+ abort();
+ }
+ return pm_allocator_debug_malloc(size);
+ }
+
+ size_t *memory = (size_t *)ptr;
+ if (old_size != memory[-1]) {
+ fprintf(stderr, "[BUG] buffer %p was allocated with size %lu but realloced with size %lu\n", ptr, memory[-1], old_size);
+ abort();
+ }
+ return pm_allocator_debug_realloc(ptr, size);
+}
+
+#undef xmalloc
+#undef xrealloc
+#undef xcalloc
+#undef xfree
+#undef xrealloc_sized
+#undef xfree_sized
+
+#define xmalloc pm_allocator_debug_malloc
+#define xrealloc pm_allocator_debug_realloc
+#define xcalloc pm_allocator_debug_calloc
+#define xfree pm_allocator_debug_free
+#define xrealloc_sized pm_allocator_debug_realloc_sized
+#define xfree_sized pm_allocator_debug_free_sized
+
+#endif
diff --git a/prism/internal/arena.h b/prism/internal/arena.h
new file mode 100644
index 0000000000..2e413b42bf
--- /dev/null
+++ b/prism/internal/arena.h
@@ -0,0 +1,108 @@
+#ifndef PRISM_INTERNAL_ARENA_H
+#define PRISM_INTERNAL_ARENA_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/flex_array.h"
+#include "prism/compiler/force_inline.h"
+#include "prism/compiler/inline.h"
+
+#include "prism/arena.h"
+
+#include <stddef.h>
+#include <string.h>
+
+/*
+ * A single block of memory in the arena. Blocks are linked via prev pointers so
+ * they can be freed by walking the chain.
+ */
+typedef struct pm_arena_block {
+ /* The previous block in the chain (for freeing). */
+ struct pm_arena_block *prev;
+
+ /* The total usable bytes in data[]. */
+ size_t capacity;
+
+ /* The number of bytes consumed so far. */
+ size_t used;
+
+ /* The block's data. */
+ char data[PM_FLEX_ARRAY_LENGTH];
+} pm_arena_block_t;
+
+/*
+ * A bump allocator. Allocations are made by bumping a pointer within the
+ * current block. When a block is full, a new block is allocated and linked to
+ * the previous one. All blocks are freed at once by walking the chain.
+ */
+struct pm_arena_t {
+ /* The active block (allocate from here). */
+ pm_arena_block_t *current;
+
+ /* The number of blocks allocated. */
+ size_t block_count;
+};
+
+/*
+ * Free all blocks in the arena. After this call, all pointers returned by
+ * pm_arena_alloc and pm_arena_zalloc are invalid.
+ */
+void pm_arena_cleanup(pm_arena_t *arena);
+
+/*
+ * Ensure the arena has at least `capacity` bytes available in its current
+ * block, allocating a new block if necessary. This allows callers to
+ * pre-size the arena to avoid repeated small block allocations.
+ */
+void pm_arena_reserve(pm_arena_t *arena, size_t capacity);
+
+/*
+ * Slow path for pm_arena_alloc: allocate a new block and return a pointer to
+ * the first `size` bytes. Do not call directly — use pm_arena_alloc instead.
+ */
+void * pm_arena_alloc_slow(pm_arena_t *arena, size_t size);
+
+/*
+ * Allocate memory from the arena. The returned memory is NOT zeroed. This
+ * function is infallible — it aborts on allocation failure.
+ *
+ * The fast path (bump pointer within the current block) is inlined at each
+ * call site. The slow path (new block allocation) is out-of-line.
+ */
+static PRISM_FORCE_INLINE void *
+pm_arena_alloc(pm_arena_t *arena, size_t size, size_t alignment) {
+ if (arena->current != NULL) {
+ size_t used_aligned = (arena->current->used + alignment - 1) & ~(alignment - 1);
+ size_t needed = used_aligned + size;
+
+ if (used_aligned >= arena->current->used && needed >= used_aligned && needed <= arena->current->capacity) {
+ arena->current->used = needed;
+ return arena->current->data + used_aligned;
+ }
+ }
+
+ return pm_arena_alloc_slow(arena, size);
+}
+
+/*
+ * Allocate zero-initialized memory from the arena. This function is infallible
+ * — it aborts on allocation failure.
+ */
+static PRISM_INLINE void *
+pm_arena_zalloc(pm_arena_t *arena, size_t size, size_t alignment) {
+ void *ptr = pm_arena_alloc(arena, size, alignment);
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+/*
+ * Allocate memory from the arena and copy the given data into it. This is a
+ * convenience wrapper around pm_arena_alloc + memcpy.
+ */
+static PRISM_INLINE void *
+pm_arena_memdup(pm_arena_t *arena, const void *src, size_t size, size_t alignment) {
+ void *dst = pm_arena_alloc(arena, size, alignment);
+ memcpy(dst, src, size);
+ return dst;
+}
+
+#endif
diff --git a/prism/internal/bit.h b/prism/internal/bit.h
new file mode 100644
index 0000000000..b0111a4c2c
--- /dev/null
+++ b/prism/internal/bit.h
@@ -0,0 +1,42 @@
+#ifndef PRISM_INTERNAL_BIT_H
+#define PRISM_INTERNAL_BIT_H
+
+#include "prism/compiler/inline.h"
+
+/*
+ * Count trailing zero bits in a 64-bit value. Used by SWAR identifier scanning
+ * to find the first non-matching byte in a word.
+ *
+ * Precondition: v must be nonzero. The result is undefined when v == 0
+ * (matching the behavior of __builtin_ctzll and _BitScanForward64).
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#define pm_ctzll(v) ((unsigned) __builtin_ctzll(v))
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#include <stdint.h>
+
+static PRISM_INLINE unsigned
+pm_ctzll(uint64_t v) {
+ unsigned long index;
+ _BitScanForward64(&index, v);
+ return (unsigned) index;
+}
+#else
+#include <stdint.h>
+
+static PRISM_INLINE unsigned
+pm_ctzll(uint64_t v) {
+ unsigned c = 0;
+ v &= (uint64_t) (-(int64_t) v);
+ if (v & 0x00000000FFFFFFFFULL) c += 0; else c += 32;
+ if (v & 0x0000FFFF0000FFFFULL) c += 0; else c += 16;
+ if (v & 0x00FF00FF00FF00FFULL) c += 0; else c += 8;
+ if (v & 0x0F0F0F0F0F0F0F0FULL) c += 0; else c += 4;
+ if (v & 0x3333333333333333ULL) c += 0; else c += 2;
+ if (v & 0x5555555555555555ULL) c += 0; else c += 1;
+ return c;
+}
+#endif
+
+#endif
diff --git a/prism/internal/buffer.h b/prism/internal/buffer.h
new file mode 100644
index 0000000000..a849bbf8e6
--- /dev/null
+++ b/prism/internal/buffer.h
@@ -0,0 +1,91 @@
+#ifndef PRISM_INTERNAL_BUFFER_H
+#define PRISM_INTERNAL_BUFFER_H
+
+#include "prism/compiler/format.h"
+
+#include "prism/buffer.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+/*
+ * A simple memory buffer that stores data in a contiguous block of memory.
+ */
+struct pm_buffer_t {
+ /* The length of the buffer in bytes. */
+ size_t length;
+
+ /* The capacity of the buffer in bytes that has been allocated. */
+ size_t capacity;
+
+ /* A pointer to the start of the buffer. */
+ char *value;
+};
+
+/* Initialize a pm_buffer_t with the given capacity. */
+void pm_buffer_init(pm_buffer_t *buffer, size_t capacity);
+
+/* Free the memory held by the buffer. */
+void pm_buffer_cleanup(pm_buffer_t *buffer);
+
+/* Append the given amount of space as zeroes to the buffer. */
+void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
+
+/* Append a formatted string to the buffer. */
+void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
+
+/* Append a string to the buffer. */
+void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/* Append a list of bytes to the buffer. */
+void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
+
+/* Append a single byte to the buffer. */
+void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
+
+/* Append a 32-bit unsigned integer to the buffer as a variable-length integer. */
+void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
+
+/* Append a 32-bit signed integer to the buffer as a variable-length integer. */
+void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
+
+/* Append a double to the buffer. */
+void pm_buffer_append_double(pm_buffer_t *buffer, double value);
+
+/* Append a unicode codepoint to the buffer. */
+bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
+
+/*
+ * The different types of escaping that can be performed by the buffer when
+ * appending a slice of Ruby source code.
+ */
+typedef enum {
+ PM_BUFFER_ESCAPING_RUBY,
+ PM_BUFFER_ESCAPING_JSON
+} pm_buffer_escaping_t;
+
+/* Append a slice of source code to the buffer. */
+void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
+
+/* Prepend the given string to the buffer. */
+void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
+
+/* Concatenate one buffer onto another. */
+void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
+
+/*
+ * Clear the buffer by reducing its size to 0. This does not free the allocated
+ * memory, but it does allow the buffer to be reused.
+ */
+void pm_buffer_clear(pm_buffer_t *buffer);
+
+/* Strip the whitespace from the end of the buffer. */
+void pm_buffer_rstrip(pm_buffer_t *buffer);
+
+/* Checks if the buffer includes the given value. */
+size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
+
+/* Insert the given string into the buffer at the given index. */
+void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
+
+#endif
diff --git a/prism/internal/char.h b/prism/internal/char.h
new file mode 100644
index 0000000000..9a58fba8c5
--- /dev/null
+++ b/prism/internal/char.h
@@ -0,0 +1,139 @@
+#ifndef PRISM_INTERNAL_CHAR_H
+#define PRISM_INTERNAL_CHAR_H
+
+#include "prism/compiler/force_inline.h"
+
+#include "prism/arena.h"
+#include "prism/line_offset_list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/* Bit flag for whitespace characters in pm_byte_table. */
+#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
+
+/* Bit flag for inline whitespace characters in pm_byte_table. */
+#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+
+/*
+ * A lookup table for classifying bytes. Each entry is a bitfield of
+ * PRISM_CHAR_BIT_* flags. Defined in char.c.
+ */
+extern const uint8_t pm_byte_table[256];
+
+/* Returns true if the given character is a whitespace character. */
+static PRISM_FORCE_INLINE bool
+pm_char_is_whitespace(const uint8_t b) {
+ return (pm_byte_table[b] & PRISM_CHAR_BIT_WHITESPACE) != 0;
+}
+
+/* Returns true if the given character is an inline whitespace character. */
+static PRISM_FORCE_INLINE bool
+pm_char_is_inline_whitespace(const uint8_t b) {
+ return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0;
+}
+
+/*
+ * Returns the number of characters at the start of the string that are inline
+ * whitespace (space/tab). Scans the byte table directly for use in hot paths.
+ */
+static PRISM_FORCE_INLINE size_t
+pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+ if (length <= 0) return 0;
+ size_t size = 0;
+ size_t maximum = (size_t) length;
+ while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_INLINE_WHITESPACE)) size++;
+ return size;
+}
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * whitespace. Disallows searching past the given maximum number of characters.
+ */
+size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * whitespace while also tracking the location of each newline. Disallows
+ * searching past the given maximum number of characters.
+ */
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset);
+
+/*
+ * Returns the number of characters at the start of the string that are decimal
+ * digits. Disallows searching past the given maximum number of characters.
+ */
+size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits. Disallows searching past the given maximum number of
+ * characters.
+ */
+size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are octal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/*
+ * Returns the number of characters at the start of the string that are decimal
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/*
+ * Returns the number of characters at the start of the string that are
+ * hexadecimal digits or underscores. Disallows searching past the given maximum
+ * number of characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+/*
+ * Returns the number of characters at the start of the string that are regexp
+ * options. Disallows searching past the given maximum number of characters.
+ */
+size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
+
+/*
+ * Returns the number of characters at the start of the string that are binary
+ * digits or underscores. Disallows searching past the given maximum number of
+ * characters.
+ *
+ * If multiple underscores are found in a row or if an underscore is
+ * found at the end of the number, then the invalid pointer is set to the index
+ * of the first invalid underscore.
+ */
+size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+
+/* Returns true if the given character is a binary digit. */
+bool pm_char_is_binary_digit(const uint8_t b);
+
+/* Returns true if the given character is an octal digit. */
+bool pm_char_is_octal_digit(const uint8_t b);
+
+/* Returns true if the given character is a decimal digit. */
+bool pm_char_is_decimal_digit(const uint8_t b);
+
+/* Returns true if the given character is a hexadecimal digit. */
+bool pm_char_is_hexadecimal_digit(const uint8_t b);
+
+#endif
diff --git a/prism/internal/comments.h b/prism/internal/comments.h
new file mode 100644
index 0000000000..bb3039a658
--- /dev/null
+++ b/prism/internal/comments.h
@@ -0,0 +1,20 @@
+#ifndef PRISM_INTERNAL_COMMENTS_H
+#define PRISM_INTERNAL_COMMENTS_H
+
+#include "prism/comments.h"
+
+#include "prism/internal/list.h"
+
+/* A comment found while parsing. */
+struct pm_comment_t {
+ /* The embedded base node. */
+ pm_list_node_t node;
+
+ /* The location of the comment in the source. */
+ pm_location_t location;
+
+ /* The type of the comment. */
+ pm_comment_type_t type;
+};
+
+#endif
diff --git a/prism/internal/constant_pool.h b/prism/internal/constant_pool.h
new file mode 100644
index 0000000000..fa2be783f5
--- /dev/null
+++ b/prism/internal/constant_pool.h
@@ -0,0 +1,117 @@
+#ifndef PRISM_INTERNAL_CONSTANT_POOL_H
+#define PRISM_INTERNAL_CONSTANT_POOL_H
+
+#include "prism/constant_pool.h"
+
+#include "prism/arena.h"
+
+#include <stdbool.h>
+
+/* A constant in the pool which effectively stores a string. */
+struct pm_constant_t {
+ /* A pointer to the start of the string. */
+ const uint8_t *start;
+
+ /* The length of the string. */
+ size_t length;
+};
+
+/*
+ * The type of bucket in the constant pool hash map. This determines how the
+ * bucket should be freed.
+ */
+typedef unsigned int pm_constant_pool_bucket_type_t;
+
+/* By default, each constant is a slice of the source. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
+
+/* An owned constant is one for which memory has been allocated. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
+
+/* A constant constant is known at compile time. */
+static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
+
+/* A bucket in the hash map. */
+typedef struct {
+ /* The incremental ID used for indexing back into the pool. */
+ unsigned int id: 30;
+
+ /* The type of the bucket, which determines how to free it. */
+ pm_constant_pool_bucket_type_t type: 2;
+
+ /* The hash of the bucket. */
+ uint32_t hash;
+
+ /*
+ * A pointer to the start of the string, stored directly in the bucket to
+ * avoid a pointer chase to the constants array during probing.
+ */
+ const uint8_t *start;
+
+ /* The length of the string. */
+ size_t length;
+} pm_constant_pool_bucket_t;
+
+/* The overall constant pool, which stores constants found while parsing. */
+struct pm_constant_pool_t {
+ /* The buckets in the hash map. */
+ pm_constant_pool_bucket_t *buckets;
+
+ /* The constants that are stored in the buckets. */
+ pm_constant_t *constants;
+
+ /* The number of buckets in the hash map. */
+ uint32_t size;
+
+ /* The number of buckets that have been allocated in the hash map. */
+ uint32_t capacity;
+};
+
+/*
+ * When we allocate constants into the pool, we reserve 0 to mean that the slot
+ * is not yet filled. This constant is reused in other places to indicate the
+ * lack of a constant id.
+ */
+#define PM_CONSTANT_ID_UNSET 0
+
+/* Initialize a list of constant ids with a given capacity. */
+void pm_constant_id_list_init_capacity(pm_arena_t *arena, pm_constant_id_list_t *list, size_t capacity);
+
+/* Insert a constant id into a list of constant ids at the specified index. */
+void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
+
+/* Checks if the current constant id list includes the given constant id. */
+bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
+
+/* Initialize a new constant pool with a given capacity. */
+void pm_constant_pool_init(pm_arena_t *arena, pm_constant_pool_t *pool, uint32_t capacity);
+
+/* Return a pointer to the constant indicated by the given constant id. */
+pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
+
+/*
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ */
+pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/*
+ * Insert a constant into a constant pool that is a slice of a source string.
+ * Returns the id of the constant, or 0 if any potential calls to resize fail.
+ */
+pm_constant_id_t pm_constant_pool_insert_shared(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+/*
+ * Insert a constant into a constant pool from memory that is now owned by the
+ * constant pool. Returns the id of the constant, or 0 if any potential calls to
+ * resize fail.
+ */
+pm_constant_id_t pm_constant_pool_insert_owned(pm_arena_t *arena, pm_constant_pool_t *pool, uint8_t *start, size_t length);
+
+/*
+ * Insert a constant into a constant pool from memory that is constant. Returns
+ * the id of the constant, or 0 if any potential calls to resize fail.
+ */
+pm_constant_id_t pm_constant_pool_insert_constant(pm_arena_t *arena, pm_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+#endif
diff --git a/prism/encoding.h b/prism/internal/encoding.h
index 5f7724821f..62392ef970 100644
--- a/prism/encoding.h
+++ b/prism/internal/encoding.h
@@ -1,128 +1,95 @@
-/**
- * @file encoding.h
- *
- * The encoding interface and implementations used by the parser.
- */
-#ifndef PRISM_ENCODING_H
-#define PRISM_ENCODING_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_strncasecmp.h"
+#ifndef PRISM_INTERNAL_ENCODING_H
+#define PRISM_INTERNAL_ENCODING_H
-#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-/**
+/*
* This struct defines the functions necessary to implement the encoding
* interface so we can determine how many bytes the subsequent character takes.
* Each callback should return the number of bytes, or 0 if the next bytes are
* invalid for the encoding and type.
*/
typedef struct {
- /**
+ /*
* Return the number of bytes that the next character takes if it is valid
* in the encoding. Does not read more than n bytes. It is assumed that n is
* at least 1.
*/
size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
- /**
+ /*
* Return the number of bytes that the next character takes if it is valid
* in the encoding and is alphabetical. Does not read more than n bytes. It
* is assumed that n is at least 1.
*/
size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
- /**
+ /*
* Return the number of bytes that the next character takes if it is valid
* in the encoding and is alphanumeric. Does not read more than n bytes. It
* is assumed that n is at least 1.
*/
size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
- /**
+ /*
* Return true if the next character is valid in the encoding and is an
* uppercase character. Does not read more than n bytes. It is assumed that
* n is at least 1.
*/
bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
- /**
+ /*
* The name of the encoding. This should correspond to a value that can be
* passed to Encoding.find in Ruby.
*/
const char *name;
- /**
- * Return true if the encoding is a multibyte encoding.
- */
+ /* Return true if the encoding is a multibyte encoding. */
bool multibyte;
} pm_encoding_t;
-/**
+/*
* All of the lookup tables use the first bit of each embedded byte to indicate
* whether the codepoint is alphabetical.
*/
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
-/**
+/*
* All of the lookup tables use the second bit of each embedded byte to indicate
* whether the codepoint is alphanumeric.
*/
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
-/**
+/*
* All of the lookup tables use the third bit of each embedded byte to indicate
* whether the codepoint is uppercase.
*/
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
-/**
- * Return the size of the next character in the UTF-8 encoding.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns The number of bytes that the next character takes if it is valid in
- * the encoding, or 0 if it is not.
- */
+/* Return the size of the next character in the UTF-8 encoding. */
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n);
-/**
+/*
* Return the size of the next character in the UTF-8 encoding if it is an
* alphabetical character.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns The number of bytes that the next character takes if it is valid in
- * the encoding, or 0 if it is not.
*/
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
-/**
+/*
* Return the size of the next character in the UTF-8 encoding if it is an
* alphanumeric character.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns The number of bytes that the next character takes if it is valid in
- * the encoding, or 0 if it is not.
*/
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
-/**
+/*
* Return true if the next character in the UTF-8 encoding if it is an uppercase
* character.
- *
- * @param b The bytes to read.
- * @param n The number of bytes that can be read.
- * @returns True if the next character is valid in the encoding and is an
- * uppercase character, or false if it is not.
*/
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
-/**
+/*
* This lookup table is referenced in both the UTF-8 encoding file and the
* parser directly in order to speed up the default encoding processing. It is
* used to indicate whether a character is alphabetical, alphanumeric, or
@@ -130,9 +97,7 @@ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
*/
extern const uint8_t pm_encoding_unicode_table[256];
-/**
- * These are all of the encodings that prism supports.
- */
+/* These are all of the encodings that prism supports. */
typedef enum {
PM_ENCODING_UTF_8 = 0,
PM_ENCODING_US_ASCII,
@@ -140,8 +105,8 @@ typedef enum {
PM_ENCODING_EUC_JP,
PM_ENCODING_WINDOWS_31J,
-// We optionally support excluding the full set of encodings to only support the
-// minimum necessary to process Ruby code without encoding comments.
+/* We optionally support excluding the full set of encodings to only support the
+ * minimum necessary to process Ruby code without encoding comments. */
#ifndef PRISM_ENCODING_EXCLUDE_FULL
PM_ENCODING_BIG5,
PM_ENCODING_BIG5_HKSCS,
@@ -233,50 +198,44 @@ typedef enum {
PM_ENCODING_MAXIMUM
} pm_encoding_type_t;
-/**
- * This is the table of all of the encodings that prism supports.
- */
+/* This is the table of all of the encodings that prism supports. */
extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
-/**
+/*
* This is the default UTF-8 encoding. We need a reference to it to quickly
* create parsers.
*/
#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
-/**
+/*
* This is the US-ASCII encoding. We need a reference to it to be able to
* compare against it when a string is being created because it could possibly
* need to fall back to ASCII-8BIT.
*/
#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
-/**
+/*
* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
* can compare against it because invalid multibyte characters are not a thing
* in this encoding. It is also needed for handling Regexp encoding flags.
*/
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
-/**
+/*
* This is the EUC-JP encoding. We need a reference to it to quickly process
* regular expression modifiers.
*/
#define PM_ENCODING_EUC_JP_ENTRY (&pm_encodings[PM_ENCODING_EUC_JP])
-/**
+/*
* This is the Windows-31J encoding. We need a reference to it to quickly
* process regular expression modifiers.
*/
#define PM_ENCODING_WINDOWS_31J_ENTRY (&pm_encodings[PM_ENCODING_WINDOWS_31J])
-/**
+/*
* Parse the given name of an encoding and return a pointer to the corresponding
* encoding struct if one can be found, otherwise return NULL.
- *
- * @param start A pointer to the first byte of the name.
- * @param end A pointer to the last byte of the name.
- * @returns A pointer to the encoding struct if one is found, otherwise NULL.
*/
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
diff --git a/prism/internal/integer.h b/prism/internal/integer.h
new file mode 100644
index 0000000000..7c9767e323
--- /dev/null
+++ b/prism/internal/integer.h
@@ -0,0 +1,68 @@
+/*
+ * This module provides functions for working with arbitrary-sized integers.
+ */
+#ifndef PRISM_INTERNAL_INTEGER_H
+#define PRISM_INTERNAL_INTEGER_H
+
+#include "prism/buffer.h"
+#include "prism/integer.h"
+
+#include <stdint.h>
+
+/*
+ * An enum controlling the base of an integer. It is expected that the base is
+ * already known before parsing the integer, even though it could be derived
+ * from the string itself.
+ */
+typedef enum {
+ /* The default decimal base, with no prefix. Leading 0s will be ignored. */
+ PM_INTEGER_BASE_DEFAULT,
+
+ /* The binary base, indicated by a 0b or 0B prefix. */
+ PM_INTEGER_BASE_BINARY,
+
+ /* The octal base, indicated by a 0, 0o, or 0O prefix. */
+ PM_INTEGER_BASE_OCTAL,
+
+ /* The decimal base, indicated by a 0d, 0D, or empty prefix. */
+ PM_INTEGER_BASE_DECIMAL,
+
+ /* The hexadecimal base, indicated by a 0x or 0X prefix. */
+ PM_INTEGER_BASE_HEXADECIMAL,
+
+ /*
+ * An unknown base, in which case pm_integer_parse will derive it based on
+ * the content of the string. This is less efficient and does more
+ * comparisons, so if callers know the base ahead of time, they should use
+ * that instead.
+ */
+ PM_INTEGER_BASE_UNKNOWN
+} pm_integer_base_t;
+
+/*
+ * Parse an integer from a string. This assumes that the format of the integer
+ * has already been validated, as internal validation checks are not performed
+ * here.
+ */
+void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
+
+/*
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
+/*
+ * Reduce a ratio of integers to its simplest form.
+ *
+ * If either the numerator or denominator do not fit into a 32-bit integer, then
+ * this function is a no-op. In the future, we may consider reducing even the
+ * larger numbers, but for now we're going to keep it simple.
+ */
+void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
+
+/* Convert an integer to a decimal string. */
+void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
+
+#endif
diff --git a/prism/internal/isinf.h b/prism/internal/isinf.h
new file mode 100644
index 0000000000..41c160f56d
--- /dev/null
+++ b/prism/internal/isinf.h
@@ -0,0 +1,16 @@
+#ifndef PRISM_INTERNAL_ISINF_H
+#define PRISM_INTERNAL_ISINF_H
+
+/*
+ * isinf on POSIX systems accepts a float, a double, or a long double. But mingw
+ * didn't provide an isinf macro, only an isinf function that only accepts
+ * floats, so we need to use _finite instead.
+ */
+#ifdef __MINGW64__
+ #include <float.h>
+ #define PRISM_ISINF(x) (!_finite(x))
+#else
+ #define PRISM_ISINF(x) isinf(x)
+#endif
+
+#endif
diff --git a/prism/internal/line_offset_list.h b/prism/internal/line_offset_list.h
new file mode 100644
index 0000000000..dac9f7052e
--- /dev/null
+++ b/prism/internal/line_offset_list.h
@@ -0,0 +1,34 @@
+#ifndef PRISM_INTERNAL_LINE_OFFSET_LIST_H
+#define PRISM_INTERNAL_LINE_OFFSET_LIST_H
+
+#include "prism/compiler/force_inline.h"
+
+#include "prism/arena.h"
+#include "prism/line_offset_list.h"
+
+/* Initialize a new line offset list with the given capacity. */
+void pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity);
+
+/* Clear out the offsets that have been appended to the list. */
+void pm_line_offset_list_clear(pm_line_offset_list_t *list);
+
+/* Append a new offset to the list (slow path with resize). */
+void pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor);
+
+/* Append a new offset to the list. */
+static PRISM_FORCE_INLINE void
+pm_line_offset_list_append(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
+ if (list->size < list->capacity) {
+ list->offsets[list->size++] = cursor;
+ } else {
+ pm_line_offset_list_append_slow(arena, list, cursor);
+ }
+}
+
+/*
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ */
+int32_t pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line);
+
+#endif
diff --git a/prism/util/pm_list.h b/prism/internal/list.h
index 3512dee979..0ab59ef32a 100644
--- a/prism/util/pm_list.h
+++ b/prism/internal/list.h
@@ -1,19 +1,9 @@
-/**
- * @file pm_list.h
- *
- * An abstract linked list.
- */
-#ifndef PRISM_LIST_H
-#define PRISM_LIST_H
+#ifndef PRISM_INTERNAL_LIST_H
+#define PRISM_INTERNAL_LIST_H
-#include "prism/defines.h"
-
-#include <stdbool.h>
#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-/**
+/*
* This struct represents an abstract linked list that provides common
* functionality. It is meant to be used any time a linked list is necessary to
* store data.
@@ -44,54 +34,29 @@
* iteration and appending of new nodes.
*/
typedef struct pm_list_node {
- /** A pointer to the next node in the list. */
+ /* A pointer to the next node in the list. */
struct pm_list_node *next;
} pm_list_node_t;
-/**
+/*
* This represents the overall linked list. It keeps a pointer to the head and
* tail so that iteration is easy and pushing new nodes is easy.
*/
typedef struct {
- /** The size of the list. */
+ /* The size of the list. */
size_t size;
- /** A pointer to the head of the list. */
+ /* A pointer to the head of the list. */
pm_list_node_t *head;
- /** A pointer to the tail of the list. */
+ /* A pointer to the tail of the list. */
pm_list_node_t *tail;
} pm_list_t;
-/**
- * Returns true if the given list is empty.
- *
- * @param list The list to check.
- * @return True if the given list is empty, otherwise false.
- */
-PRISM_EXPORTED_FUNCTION bool pm_list_empty_p(pm_list_t *list);
+/* Returns the size of the list. */
+size_t pm_list_size(pm_list_t *list);
-/**
- * Returns the size of the list.
- *
- * @param list The list to check.
- * @return The size of the list.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_list_size(pm_list_t *list);
-
-/**
- * Append a node to the given list.
- *
- * @param list The list to append to.
- * @param node The node to append.
- */
+/* Append a node to the given list. */
void pm_list_append(pm_list_t *list, pm_list_node_t *node);
-/**
- * Deallocate the internal state of the given list.
- *
- * @param list The list to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_list_free(pm_list_t *list);
-
#endif
diff --git a/prism/internal/magic_comments.h b/prism/internal/magic_comments.h
new file mode 100644
index 0000000000..72a581c5d7
--- /dev/null
+++ b/prism/internal/magic_comments.h
@@ -0,0 +1,23 @@
+#ifndef PRISM_INTERNAL_MAGIC_COMMENTS_H
+#define PRISM_INTERNAL_MAGIC_COMMENTS_H
+
+#include "prism/magic_comments.h"
+
+#include "prism/internal/list.h"
+
+/*
+ * This is a node in the linked list of magic comments that we've found while
+ * parsing.
+ */
+struct pm_magic_comment_t {
+ /* The embedded base node. */
+ pm_list_node_t node;
+
+ /* The key of the magic comment. */
+ pm_location_t key;
+
+ /* The value of the magic comment. */
+ pm_location_t value;
+};
+
+#endif
diff --git a/prism/internal/memchr.h b/prism/internal/memchr.h
new file mode 100644
index 0000000000..6f6b0bca30
--- /dev/null
+++ b/prism/internal/memchr.h
@@ -0,0 +1,15 @@
+#ifndef PRISM_INTERNAL_MEMCHR_H
+#define PRISM_INTERNAL_MEMCHR_H
+
+#include "prism/internal/encoding.h"
+
+#include <stddef.h>
+
+/*
+ * We need to roll our own memchr to handle cases where the encoding changes and
+ * we need to search for a character in a buffer that could be the trailing byte
+ * of a multibyte character.
+ */
+const void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
+
+#endif
diff --git a/prism/internal/node.h b/prism/internal/node.h
new file mode 100644
index 0000000000..ca6d5616d7
--- /dev/null
+++ b/prism/internal/node.h
@@ -0,0 +1,32 @@
+#ifndef PRISM_INTERNAL_NODE_H
+#define PRISM_INTERNAL_NODE_H
+
+#include "prism/node.h"
+
+#include "prism/compiler/force_inline.h"
+
+#include "prism/arena.h"
+
+/*
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly — use pm_node_list_append instead.
+ */
+void pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
+
+/* Append a new node onto the end of the node list. */
+static PRISM_FORCE_INLINE void
+pm_node_list_append(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+ if (list->size < list->capacity) {
+ list->nodes[list->size++] = node;
+ } else {
+ pm_node_list_append_slow(arena, list, node);
+ }
+}
+
+/* Prepend a new node onto the beginning of the node list. */
+void pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node);
+
+/* Concatenate the given node list onto the end of the other node list. */
+void pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other);
+
+#endif
diff --git a/prism/internal/options.h b/prism/internal/options.h
new file mode 100644
index 0000000000..7e37742a8b
--- /dev/null
+++ b/prism/internal/options.h
@@ -0,0 +1,212 @@
+#ifndef PRISM_INTERNAL_OPTIONS_H
+#define PRISM_INTERNAL_OPTIONS_H
+
+#include "prism/options.h"
+
+/* A scope of locals surrounding the code that is being parsed. */
+struct pm_options_scope_t {
+ /* The number of locals in the scope. */
+ size_t locals_count;
+
+ /* The names of the locals in the scope. */
+ pm_string_t *locals;
+
+ /* Flags for the set of forwarding parameters in this scope. */
+ uint8_t forwarding;
+};
+
+/*
+ * The version of Ruby syntax that we should be parsing with. This is used to
+ * allow consumers to specify which behavior they want in case they need to
+ * parse in the same way as a specific version of CRuby would have.
+ */
+typedef enum {
+ /*
+ * If an explicit version is not provided, the current version of prism will
+ * be used.
+ */
+ PM_OPTIONS_VERSION_UNSET = 0,
+
+ /* The vendored version of prism in CRuby 3.3.x. */
+ PM_OPTIONS_VERSION_CRUBY_3_3 = 1,
+
+ /* The vendored version of prism in CRuby 3.4.x. */
+ PM_OPTIONS_VERSION_CRUBY_3_4 = 2,
+
+ /* The vendored version of prism in CRuby 4.0.x. */
+ PM_OPTIONS_VERSION_CRUBY_3_5 = 3,
+
+ /* The vendored version of prism in CRuby 4.0.x. */
+ PM_OPTIONS_VERSION_CRUBY_4_0 = 3,
+
+ /* The vendored version of prism in CRuby 4.1.x. */
+ PM_OPTIONS_VERSION_CRUBY_4_1 = 4,
+
+ /* The current version of prism. */
+ PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_4_1
+} pm_options_version_t;
+
+/* The options that can be passed to the parser. */
+struct pm_options_t {
+ /*
+ * The callback to call when additional switches are found in a shebang
+ * comment.
+ */
+ pm_options_shebang_callback_t shebang_callback;
+
+ /*
+ * Any additional data that should be passed along to the shebang callback
+ * if one was set.
+ */
+ void *shebang_callback_data;
+
+ /* The name of the file that is currently being parsed. */
+ pm_string_t filepath;
+
+ /*
+ * The line within the file that the parse starts on. This value is
+ * 1-indexed.
+ */
+ int32_t line;
+
+ /*
+ * The name of the encoding that the source file is in. Note that this must
+ * correspond to a name that can be found with Encoding.find in Ruby.
+ */
+ pm_string_t encoding;
+
+ /* The number of scopes surrounding the code that is being parsed. */
+ size_t scopes_count;
+
+ /*
+ * The scopes surrounding the code that is being parsed. For most parses
+ * this will be NULL, but for evals it will be the locals that are in scope
+ * surrounding the eval. Scopes are ordered from the outermost scope to the
+ * innermost one.
+ */
+ pm_options_scope_t *scopes;
+
+ /*
+ * The version of prism that we should be parsing with. This is used to
+ * allow consumers to specify which behavior they want in case they need to
+ * parse exactly as a specific version of CRuby.
+ */
+ pm_options_version_t version;
+
+ /* A bitset of the various options that were set on the command line. */
+ uint8_t command_line;
+
+ /*
+ * Whether or not the frozen string literal option has been set.
+ * May be:
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+ */
+ int8_t frozen_string_literal;
+
+ /*
+ * Whether or not the encoding magic comments should be respected. This is a
+ * niche use-case where you want to parse a file with a specific encoding
+ * but ignore any encoding magic comments at the top of the file.
+ */
+ bool encoding_locked;
+
+ /*
+ * When the file being parsed is the main script, the shebang will be
+ * considered for command-line flags (or for implicit -x). The caller needs
+ * to pass this information to the parser so that it can behave correctly.
+ */
+ bool main_script;
+
+ /*
+ * When the file being parsed is considered a "partial" script, jumps will
+ * not be marked as errors if they are not contained within loops/blocks.
+ * This is used in the case that you're parsing a script that you know will
+ * be embedded inside another script later, but you do not have that context
+ * yet. For example, when parsing an ERB template that will be evaluated
+ * inside another script.
+ */
+ bool partial_script;
+
+ /*
+ * Whether or not the parser should freeze the nodes that it creates. This
+ * makes it possible to have a deeply frozen AST that is safe to share
+ * between concurrency primitives.
+ */
+ bool freeze;
+};
+
+/* Free the internal memory associated with the options. */
+void pm_options_cleanup(pm_options_t *options);
+
+/*
+ * Deserialize an options struct from the given binary string. This is used to
+ * pass options to the parser from an FFI call so that consumers of the library
+ * from an FFI perspective don't have to worry about the structure of our
+ * options structs. Since the source of these calls will be from Ruby
+ * implementation internals we assume it is from a trusted source.
+ *
+ * `data` is assumed to be a valid pointer pointing to well-formed data. The
+ * layout of this data should be the same every time, and is described below:
+ *
+ * | # bytes | field |
+ * | ------- | -------------------------- |
+ * | `4` | the length of the filepath |
+ * | ... | the filepath bytes |
+ * | `4` | the line number |
+ * | `4` | the length the encoding |
+ * | ... | the encoding bytes |
+ * | `1` | frozen string literal |
+ * | `1` | -p command line option |
+ * | `1` | -n command line option |
+ * | `1` | -l command line option |
+ * | `1` | -a command line option |
+ * | `1` | the version |
+ * | `1` | encoding locked |
+ * | `1` | main script |
+ * | `1` | partial script |
+ * | `1` | freeze |
+ * | `4` | the number of scopes |
+ * | ... | the scopes |
+ *
+ * The version field is an enum, so it should be one of the following values:
+ *
+ * | value | version |
+ * | ----- | ------------------------- |
+ * | `0` | use the latest version of prism |
+ * | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
+ * | `2` | use the version of prism that is vendored in CRuby 3.4.0 |
+ * | `3` | use the version of prism that is vendored in CRuby 4.0.0 |
+ * | `4` | use the version of prism that is vendored in CRuby 4.1.0 |
+ *
+ * Each scope is laid out as follows:
+ *
+ * | # bytes | field |
+ * | ------- | -------------------------- |
+ * | `4` | the number of locals |
+ * | `1` | the forwarding flags |
+ * | ... | the locals |
+ *
+ * Each local is laid out as follows:
+ *
+ * | # bytes | field |
+ * | ------- | -------------------------- |
+ * | `4` | the length of the local |
+ * | ... | the local bytes |
+ *
+ * Some additional things to note about this layout:
+ *
+ * * The filepath can have a length of 0, in which case we'll consider it an
+ * empty string.
+ * * The line number should be 0-indexed.
+ * * The encoding can have a length of 0, in which case we'll use the default
+ * encoding (UTF-8). If it's not 0, it should correspond to a name of an
+ * encoding that can be passed to `Encoding.find` in Ruby.
+ * * The frozen string literal, encoding locked, main script, and partial script
+ * fields are booleans, so their values should be either 0 or 1.
+ * * The number of scopes can be 0.
+ */
+void pm_options_read(pm_options_t *options, const char *data);
+
+#endif
diff --git a/prism/internal/parser.h b/prism/internal/parser.h
new file mode 100644
index 0000000000..4320cf4029
--- /dev/null
+++ b/prism/internal/parser.h
@@ -0,0 +1,958 @@
+#ifndef PRISM_INTERNAL_PARSER_H
+#define PRISM_INTERNAL_PARSER_H
+
+#include "prism/compiler/accel.h"
+
+#include "prism/internal/arena.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+#include "prism/internal/options.h"
+#include "prism/internal/static_literals.h"
+#include "prism/internal/strpbrk.h"
+
+#include "prism/ast.h"
+#include "prism/line_offset_list.h"
+#include "prism/parser.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/*
+ * This enum provides various bits that represent different kinds of states that
+ * the lexer can track. This is used to determine which kind of token to return
+ * based on the context of the parser.
+ */
+typedef enum {
+ PM_LEX_STATE_BIT_BEG,
+ PM_LEX_STATE_BIT_END,
+ PM_LEX_STATE_BIT_ENDARG,
+ PM_LEX_STATE_BIT_ENDFN,
+ PM_LEX_STATE_BIT_ARG,
+ PM_LEX_STATE_BIT_CMDARG,
+ PM_LEX_STATE_BIT_MID,
+ PM_LEX_STATE_BIT_FNAME,
+ PM_LEX_STATE_BIT_DOT,
+ PM_LEX_STATE_BIT_CLASS,
+ PM_LEX_STATE_BIT_LABEL,
+ PM_LEX_STATE_BIT_LABELED,
+ PM_LEX_STATE_BIT_FITEM
+} pm_lex_state_bit_t;
+
+/*
+ * This enum combines the various bits from the above enum into individual
+ * values that represent the various states of the lexer.
+ */
+typedef enum {
+ PM_LEX_STATE_NONE = 0,
+ PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
+ PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
+ PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
+ PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
+ PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
+ PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
+ PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
+ PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
+ PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
+ PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
+ PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
+ PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
+ PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
+ PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
+ PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
+ PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
+} pm_lex_state_t;
+
+/*
+ * The type of quote that a heredoc uses.
+ */
+typedef enum {
+ PM_HEREDOC_QUOTE_NONE,
+ PM_HEREDOC_QUOTE_SINGLE = '\'',
+ PM_HEREDOC_QUOTE_DOUBLE = '"',
+ PM_HEREDOC_QUOTE_BACKTICK = '`',
+} pm_heredoc_quote_t;
+
+/*
+ * The type of indentation that a heredoc uses.
+ */
+typedef enum {
+ PM_HEREDOC_INDENT_NONE,
+ PM_HEREDOC_INDENT_DASH,
+ PM_HEREDOC_INDENT_TILDE,
+} pm_heredoc_indent_t;
+
+/*
+ * All of the information necessary to store to lexing a heredoc.
+ */
+typedef struct {
+ /* A pointer to the start of the heredoc identifier. */
+ const uint8_t *ident_start;
+
+ /* The length of the heredoc identifier. */
+ size_t ident_length;
+
+ /* The type of quote that the heredoc uses. */
+ pm_heredoc_quote_t quote;
+
+ /* The type of indentation that the heredoc uses. */
+ pm_heredoc_indent_t indent;
+} pm_heredoc_lex_mode_t;
+
+/*
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
+ * kind of token it is currently lexing. For example, when we find the start of
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
+ * are found as part of a string.
+ */
+typedef struct pm_lex_mode {
+ /* The type of this lex mode. */
+ enum {
+ /* This state is used when any given token is being lexed. */
+ PM_LEX_DEFAULT,
+
+ /*
+ * This state is used when we're lexing as normal but inside an embedded
+ * expression of a string.
+ */
+ PM_LEX_EMBEXPR,
+
+ /*
+ * This state is used when we're lexing a variable that is embedded
+ * directly inside of a string with the # shorthand.
+ */
+ PM_LEX_EMBVAR,
+
+ /* This state is used when you are inside the content of a heredoc. */
+ PM_LEX_HEREDOC,
+
+ /*
+ * This state is used when we are lexing a list of tokens, as in a %w
+ * word list literal or a %i symbol list literal.
+ */
+ PM_LEX_LIST,
+
+ /*
+ * This state is used when a regular expression has been begun and we
+ * are looking for the terminator.
+ */
+ PM_LEX_REGEXP,
+
+ /*
+ * This state is used when we are lexing a string or a string-like
+ * token, as in string content with either quote or an xstring.
+ */
+ PM_LEX_STRING
+ } mode;
+
+ /* The data associated with this type of lex mode. */
+ union {
+ struct {
+ /* This keeps track of the nesting level of the list. */
+ size_t nesting;
+
+ /* Whether or not interpolation is allowed in this list. */
+ bool interpolation;
+
+ /*
+ * When lexing a list, it takes into account balancing the
+ * terminator if the terminator is one of (), [], {}, or <>.
+ */
+ uint8_t incrementor;
+
+ /* This is the terminator of the list literal. */
+ uint8_t terminator;
+
+ /*
+ * This is the character set that should be used to delimit the
+ * tokens within the list.
+ */
+ uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
+ } list;
+
+ struct {
+ /*
+ * This keeps track of the nesting level of the regular expression.
+ */
+ size_t nesting;
+
+ /*
+ * When lexing a regular expression, it takes into account balancing
+ * the terminator if the terminator is one of (), [], {}, or <>.
+ */
+ uint8_t incrementor;
+
+ /* This is the terminator of the regular expression. */
+ uint8_t terminator;
+
+ /*
+ * This is the character set that should be used to delimit the
+ * tokens within the regular expression.
+ */
+ uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
+ } regexp;
+
+ struct {
+ /* This keeps track of the nesting level of the string. */
+ size_t nesting;
+
+ /* Whether or not interpolation is allowed in this string. */
+ bool interpolation;
+
+ /*
+ * Whether or not at the end of the string we should allow a :,
+ * which would indicate this was a dynamic symbol instead of a
+ * string.
+ */
+ bool label_allowed;
+
+ /*
+ * When lexing a string, it takes into account balancing the
+ * terminator if the terminator is one of (), [], {}, or <>.
+ */
+ uint8_t incrementor;
+
+ /*
+ * This is the terminator of the string. It is typically either a
+ * single or double quote.
+ */
+ uint8_t terminator;
+
+ /*
+ * This is the character set that should be used to delimit the
+ * tokens within the string.
+ */
+ uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE];
+ } string;
+
+ struct {
+ /*
+ * All of the data necessary to lex a heredoc.
+ */
+ pm_heredoc_lex_mode_t base;
+
+ /*
+ * This is the pointer to the character where lexing should resume
+ * once the heredoc has been completely processed.
+ */
+ const uint8_t *next_start;
+
+ /*
+ * This is used to track the amount of common whitespace on each
+ * line so that we know how much to dedent each line in the case of
+ * a tilde heredoc.
+ */
+ size_t *common_whitespace;
+
+ /* True if the previous token ended with a line continuation. */
+ bool line_continuation;
+ } heredoc;
+ } as;
+
+ /* The previous lex state so that it knows how to pop. */
+ struct pm_lex_mode *prev;
+} pm_lex_mode_t;
+
+/*
+ * We pre-allocate a certain number of lex states in order to avoid having to
+ * call malloc too many times while parsing. You really shouldn't need more than
+ * this because you only really nest deeply when doing string interpolation.
+ */
+#define PM_LEX_STACK_SIZE 4
+
+/*
+ * While parsing, we keep track of a stack of contexts. This is helpful for
+ * error recovery so that we can pop back to a previous context when we hit a
+ * token that is understood by a parent context but not by the current context.
+ */
+typedef enum {
+ /* a null context, used for returning a value from a function */
+ PM_CONTEXT_NONE = 0,
+
+ /* a begin statement */
+ PM_CONTEXT_BEGIN,
+
+ /* an ensure statement with an explicit begin */
+ PM_CONTEXT_BEGIN_ENSURE,
+
+ /* a rescue else statement with an explicit begin */
+ PM_CONTEXT_BEGIN_ELSE,
+
+ /* a rescue statement with an explicit begin */
+ PM_CONTEXT_BEGIN_RESCUE,
+
+ /* expressions in block arguments using braces */
+ PM_CONTEXT_BLOCK_BRACES,
+
+ /* expressions in block arguments using do..end */
+ PM_CONTEXT_BLOCK_KEYWORDS,
+
+ /* an ensure statement within a do..end block */
+ PM_CONTEXT_BLOCK_ENSURE,
+
+ /* a rescue else statement within a do..end block */
+ PM_CONTEXT_BLOCK_ELSE,
+
+ /* expressions in block parameters `foo do |...| end ` */
+ PM_CONTEXT_BLOCK_PARAMETERS,
+
+ /* a rescue statement within a do..end block */
+ PM_CONTEXT_BLOCK_RESCUE,
+
+ /* a case when statements */
+ PM_CONTEXT_CASE_WHEN,
+
+ /* a case in statements */
+ PM_CONTEXT_CASE_IN,
+
+ /* a class declaration */
+ PM_CONTEXT_CLASS,
+
+ /* an ensure statement within a class statement */
+ PM_CONTEXT_CLASS_ENSURE,
+
+ /* a rescue else statement within a class statement */
+ PM_CONTEXT_CLASS_ELSE,
+
+ /* a rescue statement within a class statement */
+ PM_CONTEXT_CLASS_RESCUE,
+
+ /* a method definition */
+ PM_CONTEXT_DEF,
+
+ /* an ensure statement within a method definition */
+ PM_CONTEXT_DEF_ENSURE,
+
+ /* a rescue else statement within a method definition */
+ PM_CONTEXT_DEF_ELSE,
+
+ /* a rescue statement within a method definition */
+ PM_CONTEXT_DEF_RESCUE,
+
+ /* a method definition's parameters */
+ PM_CONTEXT_DEF_PARAMS,
+
+ /* a defined? expression */
+ PM_CONTEXT_DEFINED,
+
+ /* a method definition's default parameter */
+ PM_CONTEXT_DEFAULT_PARAMS,
+
+ /* an else clause */
+ PM_CONTEXT_ELSE,
+
+ /* an elsif clause */
+ PM_CONTEXT_ELSIF,
+
+ /* an interpolated expression */
+ PM_CONTEXT_EMBEXPR,
+
+ /* a for loop */
+ PM_CONTEXT_FOR,
+
+ /* a for loop's index */
+ PM_CONTEXT_FOR_INDEX,
+
+ /* an if statement */
+ PM_CONTEXT_IF,
+
+ /* a lambda expression with braces */
+ PM_CONTEXT_LAMBDA_BRACES,
+
+ /* a lambda expression with do..end */
+ PM_CONTEXT_LAMBDA_DO_END,
+
+ /* an ensure statement within a lambda expression */
+ PM_CONTEXT_LAMBDA_ENSURE,
+
+ /* a rescue else statement within a lambda expression */
+ PM_CONTEXT_LAMBDA_ELSE,
+
+ /* a rescue statement within a lambda expression */
+ PM_CONTEXT_LAMBDA_RESCUE,
+
+ /* the predicate clause of a loop statement */
+ PM_CONTEXT_LOOP_PREDICATE,
+
+ /* the top level context */
+ PM_CONTEXT_MAIN,
+
+ /* a module declaration */
+ PM_CONTEXT_MODULE,
+
+ /* an ensure statement within a module statement */
+ PM_CONTEXT_MODULE_ENSURE,
+
+ /* a rescue else statement within a module statement */
+ PM_CONTEXT_MODULE_ELSE,
+
+ /* a rescue statement within a module statement */
+ PM_CONTEXT_MODULE_RESCUE,
+
+ /* a multiple target expression */
+ PM_CONTEXT_MULTI_TARGET,
+
+ /* a parenthesized expression */
+ PM_CONTEXT_PARENS,
+
+ /* an END block */
+ PM_CONTEXT_POSTEXE,
+
+ /* a predicate inside an if/elsif/unless statement */
+ PM_CONTEXT_PREDICATE,
+
+ /* a BEGIN block */
+ PM_CONTEXT_PREEXE,
+
+ /* a modifier rescue clause */
+ PM_CONTEXT_RESCUE_MODIFIER,
+
+ /* a singleton class definition */
+ PM_CONTEXT_SCLASS,
+
+ /* an ensure statement with a singleton class */
+ PM_CONTEXT_SCLASS_ENSURE,
+
+ /* a rescue else statement with a singleton class */
+ PM_CONTEXT_SCLASS_ELSE,
+
+ /* a rescue statement with a singleton class */
+ PM_CONTEXT_SCLASS_RESCUE,
+
+ /* a ternary expression */
+ PM_CONTEXT_TERNARY,
+
+ /* an unless statement */
+ PM_CONTEXT_UNLESS,
+
+ /* an until statement */
+ PM_CONTEXT_UNTIL,
+
+ /* a while statement */
+ PM_CONTEXT_WHILE,
+} pm_context_t;
+
+/* This is a node in a linked list of contexts. */
+typedef struct pm_context_node {
+ /* The context that this node represents. */
+ pm_context_t context;
+
+ /* A pointer to the previous context in the linked list. */
+ struct pm_context_node *prev;
+} pm_context_node_t;
+
+/* The type of shareable constant value that can be set. */
+typedef uint8_t pm_shareable_constant_value_t;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
+static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
+
+/*
+ * This tracks an individual local variable in a certain lexical context, as
+ * well as the number of times is it read.
+ */
+typedef struct {
+ /* The name of the local variable. */
+ pm_constant_id_t name;
+
+ /* The location of the local variable in the source. */
+ pm_location_t location;
+
+ /* The index of the local variable in the local table. */
+ uint32_t index;
+
+ /* The number of times the local variable is read. */
+ uint32_t reads;
+
+ /* The hash of the local variable. */
+ uint32_t hash;
+} pm_local_t;
+
+/*
+ * This is a set of local variables in a certain lexical context (method, class,
+ * module, etc.). We need to track how many times these variables are read in
+ * order to warn if they only get written.
+ */
+typedef struct pm_locals {
+ /* The number of local variables in the set. */
+ uint32_t size;
+
+ /* The capacity of the local variables set. */
+ uint32_t capacity;
+
+ /*
+ * A bloom filter over constant IDs stored in this set. Used to quickly
+ * reject lookups for names that are definitely not present, avoiding the
+ * cost of a linear scan or hash probe.
+ */
+ uint32_t bloom;
+
+ /* The nullable allocated memory for the local variables in the set. */
+ pm_local_t *locals;
+} pm_locals_t;
+
+/* The flags about scope parameters that can be set. */
+typedef uint8_t pm_scope_parameters_t;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
+static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
+
+/*
+ * This struct represents a node in a linked list of scopes. Some scopes can see
+ * into their parent scopes, while others cannot.
+ */
+typedef struct pm_scope {
+ /* A pointer to the previous scope in the linked list. */
+ struct pm_scope *previous;
+
+ /* The IDs of the locals in the given scope. */
+ pm_locals_t locals;
+
+ /*
+ * This is a list of the implicit parameters contained within the block.
+ * These will be processed after the block is parsed to determine the kind
+ * of parameters node that should be used and to check if any errors need to
+ * be added.
+ */
+ pm_node_list_t implicit_parameters;
+
+ /*
+ * This is a bitfield that indicates the parameters that are being used in
+ * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
+ * There are three different kinds of parameters that can be used in a
+ * scope:
+ *
+ * - Ordinary parameters (e.g., def foo(bar); end)
+ * - Numbered parameters (e.g., def foo; _1; end)
+ * - The it parameter (e.g., def foo; it; end)
+ *
+ * If ordinary parameters are being used, then certain parameters can be
+ * forwarded to another method/structure. Those are indicated by four
+ * additional bits in the params field. For example, some combinations of:
+ *
+ * - def foo(*); end
+ * - def foo(**); end
+ * - def foo(&); end
+ * - def foo(...); end
+ */
+ pm_scope_parameters_t parameters;
+
+ /*
+ * The current state of constant shareability for this scope. This is
+ * changed by magic shareable_constant_value comments.
+ */
+ pm_shareable_constant_value_t shareable_constant;
+
+ /*
+ * A boolean indicating whether or not this scope can see into its parent.
+ * If closed is true, then the scope cannot see into its parent.
+ */
+ bool closed;
+} pm_scope_t;
+
+/*
+ * A struct that represents a stack of boolean values.
+ */
+typedef uint32_t pm_state_stack_t;
+
+/*
+ * This struct represents the overall parser. It contains a reference to the
+ * source file, as well as pointers that indicate where in the source it's
+ * currently parsing. It also contains the most recent and current token that
+ * it's considering.
+ */
+struct pm_parser_t {
+ /* The arena used for all AST-lifetime allocations. Caller-owned. */
+ pm_arena_t *arena;
+
+ /* The arena used for parser metadata (comments, diagnostics, etc.). */
+ pm_arena_t metadata_arena;
+
+ /*
+ * The next node identifier that will be assigned. This is a unique
+ * identifier used to track nodes such that the syntax tree can be dropped
+ * but the node can be found through another parse.
+ */
+ uint32_t node_id;
+
+ /*
+ * A single-entry cache for pm_parser_constant_id_raw. Avoids redundant
+ * constant pool lookups when the same token is resolved multiple times
+ * (e.g., once during lexing for local variable detection, and again
+ * during parsing for node creation).
+ */
+ struct {
+ const uint8_t *start;
+ const uint8_t *end;
+ pm_constant_id_t id;
+ } constant_cache;
+
+ /* The current state of the lexer. */
+ pm_lex_state_t lex_state;
+
+ /* Tracks the current nesting of (), [], and {}. */
+ int enclosure_nesting;
+
+ /*
+ * Used to temporarily track the nesting of enclosures to determine if a {
+ * is the beginning of a lambda following the parameters of a lambda.
+ */
+ int lambda_enclosure_nesting;
+
+ /*
+ * Used to track the nesting of braces to ensure we get the correct value
+ * when we are interpolating blocks with braces.
+ */
+ int brace_nesting;
+
+ /*
+ * The stack used to determine if a do keyword belongs to the predicate of a
+ * while, until, or for loop.
+ */
+ pm_state_stack_t do_loop_stack;
+
+ /*
+ * The stack used to determine if a do keyword belongs to the beginning of a
+ * block.
+ */
+ pm_state_stack_t accepts_block_stack;
+
+ /* A stack of lex modes. */
+ struct {
+ /* The current mode of the lexer. */
+ pm_lex_mode_t *current;
+
+ /* The stack of lexer modes. */
+ pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
+
+ /* The current index into the lexer mode stack. */
+ size_t index;
+ } lex_modes;
+
+ /* The pointer to the start of the source. */
+ const uint8_t *start;
+
+ /* The pointer to the end of the source. */
+ const uint8_t *end;
+
+ /* The previous token we were considering. */
+ pm_token_t previous;
+
+ /* The current token we're considering. */
+ pm_token_t current;
+
+ /*
+ * This is a special field set on the parser when we need the parser to jump
+ * to a specific location when lexing the next token, as opposed to just
+ * using the end of the previous token. Normally this is NULL.
+ */
+ const uint8_t *next_start;
+
+ /*
+ * This field indicates the end of a heredoc whose identifier was found on
+ * the current line. If another heredoc is found on the same line, then this
+ * will be moved forward to the end of that heredoc. If no heredocs are
+ * found on a line then this is NULL.
+ */
+ const uint8_t *heredoc_end;
+
+ /* The list of comments that have been found while parsing. */
+ pm_list_t comment_list;
+
+ /* The list of magic comments that have been found while parsing. */
+ pm_list_t magic_comment_list;
+
+ /*
+ * An optional location that represents the location of the __END__ marker
+ * and the rest of the content of the file. This content is loaded into the
+ * DATA constant when the file being parsed is the main file being executed.
+ */
+ pm_location_t data_loc;
+
+ /* The list of warnings that have been found while parsing. */
+ pm_list_t warning_list;
+
+ /* The list of errors that have been found while parsing. */
+ pm_list_t error_list;
+
+ /* The current local scope. */
+ pm_scope_t *current_scope;
+
+ /* The current parsing context. */
+ pm_context_node_t *current_context;
+
+ /*
+ * The hash keys for the hash that is currently being parsed. This is not
+ * usually necessary because it can pass it down the various call chains,
+ * but in the event that you're parsing a hash that is being directly
+ * pushed into another hash with **, we need to share the hash keys so that
+ * we can warn for the nested hash as well.
+ */
+ pm_static_literals_t *current_hash_keys;
+
+ /*
+ * The encoding functions for the current file is attached to the parser as
+ * it's parsing so that it can change with a magic comment.
+ */
+ const pm_encoding_t *encoding;
+
+ /*
+ * When the encoding that is being used to parse the source is changed by
+ * prism, we provide the ability here to call out to a user-defined
+ * function.
+ */
+ pm_encoding_changed_callback_t encoding_changed_callback;
+
+ /*
+ * This pointer indicates where a comment must start if it is to be
+ * considered an encoding comment.
+ */
+ const uint8_t *encoding_comment_start;
+
+ /*
+ * When you are lexing through a file, the lexer needs all of the information
+ * that the parser additionally provides (for example, the local table). So if
+ * you want to properly lex Ruby, you need to actually lex it in the context of
+ * the parser. In order to provide this functionality, we optionally allow a
+ * struct to be attached to the parser that calls back out to a user-provided
+ * callback when each token is lexed.
+ */
+ struct {
+ /*
+ * This is the callback that is called when a token is lexed. It is
+ * passed the opaque data pointer, the parser, and the token that was
+ * lexed.
+ */
+ pm_lex_callback_t callback;
+
+ /*
+ * This opaque pointer is used to provide whatever information the user
+ * deemed necessary to the callback. In our case we use it to pass the
+ * array that the tokens get appended into.
+ */
+ void *data;
+ } lex_callback;
+
+ /*
+ * This is the path of the file being parsed. We use the filepath when
+ * constructing SourceFileNodes.
+ */
+ pm_string_t filepath;
+
+ /*
+ * This constant pool keeps all of the constants defined throughout the file
+ * so that we can reference them later.
+ */
+ pm_constant_pool_t constant_pool;
+
+ /* This is the list of line offsets in the source file. */
+ pm_line_offset_list_t line_offsets;
+
+ /*
+ * State communicated from the lexer to the parser for integer tokens.
+ */
+ struct {
+ /*
+ * A flag indicating the base of the integer (binary, octal, decimal,
+ * hexadecimal). Set during lexing and read during node creation.
+ */
+ pm_node_flags_t base;
+
+ /*
+ * When lexing a decimal integer that fits in a uint32_t, we compute
+ * the value during lexing to avoid re-scanning the digits during
+ * parsing. If lexed is true, this holds the result and
+ * pm_integer_parse can be skipped.
+ */
+ uint32_t value;
+
+ /* Whether value holds a valid pre-computed integer. */
+ bool lexed;
+ } integer;
+
+ /*
+ * This string is used to pass information from the lexer to the parser. It
+ * is particularly necessary because of escape sequences.
+ */
+ pm_string_t current_string;
+
+ /*
+ * The line number at the start of the parse. This will be used to offset
+ * the line numbers of all of the locations.
+ */
+ int32_t start_line;
+
+ /*
+ * When a string-like expression is being lexed, any byte or escape sequence
+ * that resolves to a value whose top bit is set (i.e., >= 0x80) will
+ * explicitly set the encoding to the same encoding as the source.
+ * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
+ * resolves to a value whose top bit is set, then the encoding will be
+ * explicitly set to UTF-8.
+ *
+ * The _next_ time this happens, if the encoding that is about to become the
+ * explicitly set encoding does not match the previously set explicit
+ * encoding, a mixed encoding error will be emitted.
+ *
+ * When the expression is finished being lexed, the explicit encoding
+ * controls the encoding of the expression. For the most part this means
+ * that the expression will either be encoded in the source encoding or
+ * UTF-8. This holds for all encodings except US-ASCII. If the source is
+ * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
+ * expression will be encoded as ASCII-8BIT.
+ *
+ * Note that if the expression is a list, different elements within the same
+ * list can have different encodings, so this will get reset between each
+ * element. Furthermore all of this only applies to lists that support
+ * interpolation, because otherwise escapes that could change the encoding
+ * are ignored.
+ *
+ * At first glance, it may make more sense for this to live on the lexer
+ * mode, but we need it here to communicate back to the parser for character
+ * literals that do not push a new lexer mode.
+ */
+ const pm_encoding_t *explicit_encoding;
+
+ /*
+ * When parsing block exits (e.g., break, next, redo), we need to validate
+ * that they are in correct contexts. For the most part we can do this by
+ * looking at our parent contexts. However, modifier while and until
+ * expressions can change that context to make block exits valid. In these
+ * cases, we need to keep track of the block exits and then validate them
+ * after the expression has been parsed.
+ *
+ * We use a pointer here because we don't want to keep a whole list attached
+ * since this will only be used in the context of begin/end expressions.
+ */
+ pm_node_list_t *current_block_exits;
+
+ /* The version of prism that we should use to parse. */
+ pm_options_version_t version;
+
+ /* The command line flags given from the options. */
+ uint8_t command_line;
+
+ /*
+ * Whether or not we have found a frozen_string_literal magic comment with
+ * a true or false value.
+ * May be:
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
+ */
+ int8_t frozen_string_literal;
+
+ /*
+ * Whether or not we are parsing an eval string. This impacts whether or not
+ * we should evaluate if block exits/yields are valid.
+ */
+ bool parsing_eval;
+
+ /*
+ * Whether or not we are parsing a "partial" script, which is a script that
+ * will be evaluated in the context of another script, so we should not
+ * check jumps (next/break/etc.) for validity.
+ */
+ bool partial_script;
+
+ /* Whether or not we're at the beginning of a command. */
+ bool command_start;
+
+ /*
+ * Whether or not we're currently parsing the body of an endless method
+ * definition. In this context, PM_TOKEN_KEYWORD_DO_BLOCK should not be
+ * consumed by commands (it should bubble up to the outer context).
+ */
+ bool in_endless_def_body;
+
+ /* Whether or not we're currently recovering from a syntax error. */
+ bool recovering;
+
+ /*
+ * Whether or not the source being parsed could become valid if more input
+ * were appended. This is set to false when the parser encounters a token
+ * that is definitively wrong (e.g., a stray `end` or `]`) as opposed to
+ * merely incomplete.
+ */
+ bool continuable;
+
+ /*
+ * This is very specialized behavior for when you want to parse in a context
+ * that does not respect encoding comments. Its main use case is translating
+ * into the whitequark/parser AST which re-encodes source files in UTF-8
+ * before they are parsed and ignores encoding comments.
+ */
+ bool encoding_locked;
+
+ /*
+ * Whether or not the encoding has been changed by a magic comment. We use
+ * this to provide a fast path for the lexer instead of going through the
+ * function pointer.
+ */
+ bool encoding_changed;
+
+ /*
+ * This flag indicates that we are currently parsing a pattern matching
+ * expression and impacts that calculation of newlines.
+ */
+ bool pattern_matching_newlines;
+
+ /* This flag indicates that we are currently parsing a keyword argument. */
+ bool in_keyword_arg;
+
+ /*
+ * Whether or not the parser has seen a token that has semantic meaning
+ * (i.e., a token that is not a comment or whitespace).
+ */
+ bool semantic_token_seen;
+
+ /*
+ * By default, Ruby always warns about mismatched indentation. This can be
+ * toggled with a magic comment.
+ */
+ bool warn_mismatched_indentation;
+
+#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
+ /*
+ * Cached lookup tables for pm_strpbrk's SIMD fast path. Avoids rebuilding
+ * the nibble-based tables on every call when the charset hasn't changed
+ * (which is the common case during string/regex/list lexing).
+ */
+ struct {
+ /* The cached charset (null-terminated, max 11 chars + NUL). */
+ uint8_t charset[12];
+
+ /* Nibble-based low lookup table for SIMD matching. */
+ uint8_t low_lut[16];
+
+ /* Nibble-based high lookup table for SIMD matching. */
+ uint8_t high_lut[16];
+
+ /* Scalar fallback table (4 x 64-bit bitmasks covering all ASCII). */
+ uint64_t table[4];
+ } strpbrk_cache;
+#endif
+};
+
+/*
+ * Initialize a parser with the given start and end pointers.
+ */
+void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
+
+/*
+ * Free the memory held by the given parser.
+ *
+ * This does not free the `pm_options_t` object that was used to initialize the
+ * parser.
+ */
+void pm_parser_cleanup(pm_parser_t *parser);
+
+#endif
diff --git a/prism/internal/regexp.h b/prism/internal/regexp.h
new file mode 100644
index 0000000000..3710c984fc
--- /dev/null
+++ b/prism/internal/regexp.h
@@ -0,0 +1,41 @@
+#ifndef PRISM_INTERNAL_REGEXP_H
+#define PRISM_INTERNAL_REGEXP_H
+
+#include "prism/ast.h"
+#include "prism/parser.h"
+
+/*
+ * Accumulation state for named capture groups found during regexp parsing.
+ * The caller initializes this with the call node and passes it to
+ * pm_regexp_parse. The regexp parser populates match and names as groups
+ * are found.
+ */
+typedef struct {
+ /* The call node wrapping the regular expression node (for =~). */
+ pm_call_node_t *call;
+
+ /* The match write node being built, or NULL if no captures found yet. */
+ pm_match_write_node_t *match;
+
+ /* The list of capture names found so far (for deduplication). */
+ pm_constant_id_list_t names;
+} pm_regexp_name_data_t;
+
+/*
+ * Callback invoked by pm_regexp_parse() for each named capture group found.
+ */
+typedef void (*pm_regexp_name_callback_t)(pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data);
+
+/*
+ * Parse a regular expression, validate its encoding, and optionally extract
+ * named capture groups. Returns the encoding flags to set on the node.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_flags_t pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data);
+
+/*
+ * Parse an interpolated regular expression for named capture groups only.
+ * No encoding validation is performed.
+ */
+void pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data);
+
+#endif
diff --git a/prism/internal/serialize.h b/prism/internal/serialize.h
new file mode 100644
index 0000000000..e611a0374b
--- /dev/null
+++ b/prism/internal/serialize.h
@@ -0,0 +1,34 @@
+#ifndef PRISM_INTERNAL_SERIALIZE_H
+#define PRISM_INTERNAL_SERIALIZE_H
+
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+
+#include "prism/ast.h"
+#include "prism/buffer.h"
+#include "prism/excludes.h"
+#include "prism/parser.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+/*
+ * Serialize the given list of comments to the given buffer.
+ */
+void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer);
+
+/*
+ * Serialize the name of the encoding to the buffer.
+ */
+void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer);
+
+/*
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ */
+void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
+
+#endif
+
+#endif
diff --git a/prism/internal/source.h b/prism/internal/source.h
new file mode 100644
index 0000000000..b3c2b55be3
--- /dev/null
+++ b/prism/internal/source.h
@@ -0,0 +1,72 @@
+#ifndef PRISM_INTERNAL_SOURCE_H
+#define PRISM_INTERNAL_SOURCE_H
+
+#include "prism/source.h"
+#include "prism/buffer.h"
+
+#include <stdbool.h>
+
+/*
+ * The type of source, which determines cleanup behavior.
+ */
+typedef enum {
+ /* Wraps existing constant memory, no cleanup. */
+ PM_SOURCE_CONSTANT,
+
+ /* Wraps existing shared memory (non-owning slice), no cleanup. */
+ PM_SOURCE_SHARED,
+
+ /* Owns a heap-allocated buffer, freed on cleanup. */
+ PM_SOURCE_OWNED,
+
+ /* Memory-mapped file, unmapped on cleanup. */
+ PM_SOURCE_MAPPED,
+
+ /* Stream source backed by a pm_buffer_t. */
+ PM_SOURCE_STREAM
+} pm_source_type_t;
+
+/*
+ * The internal representation of a source.
+ */
+struct pm_source_t {
+ /* A pointer to the start of the source data. */
+ const uint8_t *source;
+
+ /* The length of the source data in bytes. */
+ size_t length;
+
+ /* The type of the source. */
+ pm_source_type_t type;
+
+ /* Stream-specific data, only used for PM_SOURCE_STREAM sources. */
+ struct {
+ /* The buffer that holds the accumulated stream data. */
+ pm_buffer_t *buffer;
+
+ /* The stream object to read from. */
+ void *stream;
+
+ /* The function to use to read from the stream. */
+ pm_source_stream_fgets_t *fgets;
+
+ /* The function to use to check if the stream is at EOF. */
+ pm_source_stream_feof_t *feof;
+
+ /* Whether the stream has reached EOF. */
+ bool eof;
+ } stream;
+};
+
+/*
+ * Read from a stream into the source's internal buffer. This is used by
+ * pm_parse_stream to incrementally read the source.
+ */
+bool pm_source_stream_read(pm_source_t *source);
+
+/*
+ * Returns whether the stream source has reached EOF.
+ */
+bool pm_source_stream_eof(const pm_source_t *source);
+
+#endif
diff --git a/prism/static_literals.h b/prism/internal/static_literals.h
index bd29761899..d59002ac0a 100644
--- a/prism/static_literals.h
+++ b/prism/internal/static_literals.h
@@ -1,33 +1,25 @@
-/**
- * @file static_literals.h
- *
- * A set of static literal nodes that can be checked for duplicates.
- */
-#ifndef PRISM_STATIC_LITERALS_H
-#define PRISM_STATIC_LITERALS_H
+#ifndef PRISM_INTERNAL_STATIC_LITERALS_H
+#define PRISM_INTERNAL_STATIC_LITERALS_H
-#include "prism/defines.h"
#include "prism/ast.h"
-#include "prism/util/pm_newline_list.h"
-
-#include <assert.h>
-#include <stdbool.h>
+#include "prism/buffer.h"
+#include "prism/line_offset_list.h"
-/**
+/*
* An internal hash table for a set of nodes.
*/
typedef struct {
- /** The array of nodes in the hash table. */
+ /* The array of nodes in the hash table. */
pm_node_t **nodes;
- /** The size of the hash table. */
+ /* The size of the hash table. */
uint32_t size;
- /** The space that has been allocated in the hash table. */
+ /* The space that has been allocated in the hash table. */
uint32_t capacity;
} pm_node_hash_t;
-/**
+/*
* Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
* to alert the user of potential issues. To do this, we keep a set of the nodes
* that have been seen so far, and compare whenever we find a new node.
@@ -36,86 +28,71 @@ typedef struct {
* that need to be performed.
*/
typedef struct {
- /**
+ /*
* This is the set of IntegerNode and SourceLineNode instances.
*/
pm_node_hash_t integer_nodes;
- /**
+ /*
* This is the set of FloatNode instances.
*/
pm_node_hash_t float_nodes;
- /**
+ /*
* This is the set of RationalNode and ImaginaryNode instances.
*/
pm_node_hash_t number_nodes;
- /**
+ /*
* This is the set of StringNode and SourceFileNode instances.
*/
pm_node_hash_t string_nodes;
- /**
+ /*
* This is the set of RegularExpressionNode instances.
*/
pm_node_hash_t regexp_nodes;
- /**
+ /*
* This is the set of SymbolNode instances.
*/
pm_node_hash_t symbol_nodes;
- /**
+ /*
* A pointer to the last TrueNode instance that was inserted, or NULL.
*/
pm_node_t *true_node;
- /**
+ /*
* A pointer to the last FalseNode instance that was inserted, or NULL.
*/
pm_node_t *false_node;
- /**
+ /*
* A pointer to the last NilNode instance that was inserted, or NULL.
*/
pm_node_t *nil_node;
- /**
+ /*
* A pointer to the last SourceEncodingNode instance that was inserted, or
* NULL.
*/
pm_node_t *source_encoding_node;
} pm_static_literals_t;
-/**
+/*
* Add a node to the set of static literals.
- *
- * @param newline_list The list of newline offsets to use to calculate lines.
- * @param start_line The line number that the parser starts on.
- * @param literals The set of static literals to add the node to.
- * @param node The node to add to the set.
- * @param replace Whether to replace the previous node if one already exists.
- * @return A pointer to the node that is being overwritten, if there is one.
*/
-pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
+pm_node_t * pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
-/**
+/*
* Free the internal memory associated with the given static literals set.
- *
- * @param literals The set of static literals to free.
*/
void pm_static_literals_free(pm_static_literals_t *literals);
-/**
+/*
* Create a string-based representation of the given static literal.
- *
- * @param buffer The buffer to write the string to.
- * @param newline_list The list of newline offsets to use to calculate lines.
- * @param start_line The line number that the parser starts on.
- * @param encoding_name The name of the encoding of the source being parsed.
- * @param node The node to create a string representation of.
*/
-void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
+void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node);
#endif
diff --git a/prism/internal/stringy.h b/prism/internal/stringy.h
new file mode 100644
index 0000000000..1aaa23ea75
--- /dev/null
+++ b/prism/internal/stringy.h
@@ -0,0 +1,30 @@
+#ifndef PRISM_INTERNAL_STRINGY_H
+#define PRISM_INTERNAL_STRINGY_H
+
+#include "prism/stringy.h"
+
+/*
+ * Defines an empty string. This is useful for initializing a string that will
+ * be filled in later.
+ */
+#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
+
+/*
+ * Initialize a shared string that is based on initial input.
+ */
+void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
+
+/*
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
+
+/*
+ * Free the associated memory of the given string.
+ */
+void pm_string_cleanup(pm_string_t *string);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.h b/prism/internal/strncasecmp.h
index 5cb88cb5eb..775f6a993e 100644
--- a/prism/util/pm_strncasecmp.h
+++ b/prism/internal/strncasecmp.h
@@ -1,18 +1,10 @@
-/**
- * @file pm_strncasecmp.h
- *
- * A custom strncasecmp implementation.
- */
-#ifndef PRISM_STRNCASECMP_H
-#define PRISM_STRNCASECMP_H
+#ifndef PRISM_INTERNAL_STRNCASECMP_H
+#define PRISM_INTERNAL_STRNCASECMP_H
-#include "prism/defines.h"
-
-#include <ctype.h>
#include <stddef.h>
#include <stdint.h>
-/**
+/*
* Compare two strings, ignoring case, up to the given length. Returns 0 if the
* strings are equal, a negative number if string1 is less than string2, or a
* positive number if string1 is greater than string2.
@@ -20,12 +12,6 @@
* Note that this is effectively our own implementation of strncasecmp, but it's
* not available on all of the platforms we want to support so we're rolling it
* here.
- *
- * @param string1 The first string to compare.
- * @param string2 The second string to compare
- * @param length The maximum number of characters to compare.
- * @return 0 if the strings are equal, a negative number if string1 is less than
- * string2, or a positive number if string1 is greater than string2.
*/
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length);
diff --git a/prism/util/pm_strpbrk.h b/prism/internal/strpbrk.h
index f387bd5782..d64156c002 100644
--- a/prism/util/pm_strpbrk.h
+++ b/prism/internal/strpbrk.h
@@ -1,19 +1,15 @@
-/**
- * @file pm_strpbrk.h
- *
- * A custom strpbrk implementation.
- */
-#ifndef PRISM_STRPBRK_H
-#define PRISM_STRPBRK_H
+#ifndef PRISM_INTERNAL_STRPBRK_H
+#define PRISM_INTERNAL_STRPBRK_H
-#include "prism/defines.h"
-#include "prism/diagnostic.h"
#include "prism/parser.h"
+/* The maximum number of bytes in a strpbrk charset. */
+#define PM_STRPBRK_CACHE_SIZE 16
+
#include <stddef.h>
-#include <string.h>
+#include <stdint.h>
-/**
+/*
* Here we have rolled our own version of strpbrk. The standard library strpbrk
* has undefined behavior when the source string is not null-terminated. We want
* to support strings that are not null-terminated because pm_parse does not
@@ -31,15 +27,6 @@
* characters that are trailing bytes of multi-byte characters. For example, in
* Shift-JIS, the backslash character can be a trailing byte. In that case we
* need to take a slower path and iterate one multi-byte character at a time.
- *
- * @param parser The parser.
- * @param source The source to search.
- * @param charset The charset to search for.
- * @param length The maximum number of bytes to search.
- * @param validate Whether to validate that the source string is valid in the
- * current encoding of the parser.
- * @return A pointer to the first character in the source string that is in the
- * charset, or NULL if no such character exists.
*/
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
diff --git a/prism/internal/tokens.h b/prism/internal/tokens.h
new file mode 100644
index 0000000000..3a983e54ae
--- /dev/null
+++ b/prism/internal/tokens.h
@@ -0,0 +1,11 @@
+#ifndef PRISM_INTERNAL_TOKENS_H
+#define PRISM_INTERNAL_TOKENS_H
+
+#include "prism/ast.h"
+
+/*
+ * Returns the human name of the given token type.
+ */
+const char * pm_token_str(pm_token_type_t token_type);
+
+#endif
diff --git a/prism/json.h b/prism/json.h
new file mode 100644
index 0000000000..11039e7796
--- /dev/null
+++ b/prism/json.h
@@ -0,0 +1,32 @@
+/**
+ * @file json.h
+ */
+#ifndef PRISM_JSON_H
+#define PRISM_JSON_H
+
+#include "prism/excludes.h"
+
+/* We optionally support dumping to JSON. For systems that don't want or need
+ * this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
+ */
+#ifndef PRISM_EXCLUDE_JSON
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+#include "prism/buffer.h"
+#include "prism/parser.h"
+
+/**
+ * Dump JSON to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param parser The parser that parsed the node.
+ * @param node The node to serialize.
+ */
+PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) PRISM_NONNULL(1, 2, 3);
+
+#endif
+
+#endif
diff --git a/prism/line_offset_list.c b/prism/line_offset_list.c
new file mode 100644
index 0000000000..ce217ebd3f
--- /dev/null
+++ b/prism/line_offset_list.c
@@ -0,0 +1,100 @@
+#include "prism/compiler/align.h"
+#include "prism/internal/line_offset_list.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
+#include <string.h>
+
+/**
+ * Initialize a new line offset list with the given capacity.
+ */
+void
+pm_line_offset_list_init(pm_arena_t *arena, pm_line_offset_list_t *list, size_t capacity) {
+ list->offsets = (uint32_t *) pm_arena_alloc(arena, capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
+
+ // The first line always has offset 0.
+ list->offsets[0] = 0;
+ list->size = 1;
+ list->capacity = capacity;
+}
+
+/**
+ * Clear out the newlines that have been appended to the list.
+ */
+void
+pm_line_offset_list_clear(pm_line_offset_list_t *list) {
+ list->size = 1;
+}
+
+/**
+ * Append a new offset to the newline list (slow path: resize and store).
+ */
+void
+pm_line_offset_list_append_slow(pm_arena_t *arena, pm_line_offset_list_t *list, uint32_t cursor) {
+ size_t new_capacity = (list->capacity * 3) / 2;
+ uint32_t *new_offsets = (uint32_t *) pm_arena_alloc(arena, new_capacity * sizeof(uint32_t), PRISM_ALIGNOF(uint32_t));
+
+ memcpy(new_offsets, list->offsets, list->size * sizeof(uint32_t));
+
+ list->offsets = new_offsets;
+ list->capacity = new_capacity;
+
+ assert(list->size == 0 || cursor > list->offsets[list->size - 1]);
+ list->offsets[list->size++] = cursor;
+}
+
+/**
+ * Returns the line of the given offset. If the offset is not in the list, the
+ * line of the closest offset less than the given offset is returned.
+ */
+int32_t
+pm_line_offset_list_line(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) {
+ size_t left = 0;
+ size_t right = list->size - 1;
+
+ while (left <= right) {
+ size_t mid = left + (right - left) / 2;
+
+ if (list->offsets[mid] == cursor) {
+ return ((int32_t) mid) + start_line;
+ }
+
+ if (list->offsets[mid] < cursor) {
+ left = mid + 1;
+ } else {
+ right = mid - 1;
+ }
+ }
+
+ return ((int32_t) left) + start_line - 1;
+}
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ */
+pm_line_column_t
+pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) {
+ size_t left = 0;
+ size_t right = list->size - 1;
+
+ while (left <= right) {
+ size_t mid = left + (right - left) / 2;
+
+ if (list->offsets[mid] == cursor) {
+ return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
+ }
+
+ if (list->offsets[mid] < cursor) {
+ left = mid + 1;
+ } else {
+ right = mid - 1;
+ }
+ }
+
+ return ((pm_line_column_t) {
+ .line = ((int32_t) left) + start_line - 1,
+ .column = cursor - list->offsets[left - 1]
+ });
+}
diff --git a/prism/line_offset_list.h b/prism/line_offset_list.h
new file mode 100644
index 0000000000..848bc49139
--- /dev/null
+++ b/prism/line_offset_list.h
@@ -0,0 +1,61 @@
+/**
+ * @file line_offset_list.h
+ *
+ * A list of byte offsets of newlines in a string.
+ *
+ * When compiling the syntax tree, it's necessary to know the line and column
+ * of many nodes. This is necessary to support things like error messages,
+ * tracepoints, etc.
+ *
+ * It's possible that we could store the start line, start column, end line, and
+ * end column on every node in addition to the offsets that we already store,
+ * but that would be quite a lot of memory overhead.
+ */
+#ifndef PRISM_LINE_OFFSET_LIST_H
+#define PRISM_LINE_OFFSET_LIST_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A list of offsets of the start of lines in a string. The offsets are assumed
+ * to be sorted/inserted in ascending order.
+ */
+typedef struct {
+ /** The number of offsets in the list. */
+ size_t size;
+
+ /** The capacity of the list that has been allocated. */
+ size_t capacity;
+
+ /** The list of offsets. */
+ uint32_t *offsets;
+} pm_line_offset_list_t;
+
+/**
+ * A line and column in a string.
+ */
+typedef struct {
+ /** The line number. */
+ int32_t line;
+
+ /** The column in bytes. */
+ uint32_t column;
+} pm_line_column_t;
+
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ *
+ * @param list The list to search.
+ * @param cursor The offset to search for.
+ * @param start_line The line to start counting from.
+ * @returns The line and column of the given offset.
+ */
+PRISM_EXPORTED_FUNCTION pm_line_column_t pm_line_offset_list_line_column(const pm_line_offset_list_t *list, uint32_t cursor, int32_t start_line) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/list.c b/prism/list.c
new file mode 100644
index 0000000000..8d4cd1be94
--- /dev/null
+++ b/prism/list.c
@@ -0,0 +1,24 @@
+#include "prism/internal/list.h"
+
+/**
+ * Returns the size of the list.
+ */
+size_t
+pm_list_size(pm_list_t *list) {
+ return list->size;
+}
+
+/**
+ * Append a node to the given list.
+ */
+void
+pm_list_append(pm_list_t *list, pm_list_node_t *node) {
+ if (list->head == NULL) {
+ list->head = node;
+ } else {
+ list->tail->next = node;
+ }
+
+ list->tail = node;
+ list->size++;
+}
diff --git a/prism/magic_comments.h b/prism/magic_comments.h
new file mode 100644
index 0000000000..c9d6b600e8
--- /dev/null
+++ b/prism/magic_comments.h
@@ -0,0 +1,35 @@
+/**
+ * @file magic_comments.h
+ *
+ * Types and functions related to magic comments found during parsing.
+ */
+#ifndef PRISM_MAGIC_COMMENTS_H
+#define PRISM_MAGIC_COMMENTS_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
+
+#include <stddef.h>
+
+/** An opaque pointer to a magic comment found while parsing. */
+typedef struct pm_magic_comment_t pm_magic_comment_t;
+
+/**
+ * Returns the location of the key associated with the given magic comment.
+ *
+ * @param magic_comment the magic comment whose key location we want to get
+ * @returns the location of the key associated with the given magic comment
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_key(const pm_magic_comment_t *magic_comment) PRISM_NONNULL(1);
+
+/**
+ * Returns the location of the value associated with the given magic comment.
+ *
+ * @param magic_comment the magic comment whose value location we want to get
+ * @returns the location of the value associated with the given magic comment
+ */
+PRISM_EXPORTED_FUNCTION pm_location_t pm_magic_comment_value(const pm_magic_comment_t *magic_comment) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_memchr.c b/prism/memchr.c
index 7ea20ace6d..900e6245b7 100644
--- a/prism/util/pm_memchr.c
+++ b/prism/memchr.c
@@ -1,15 +1,19 @@
-#include "prism/util/pm_memchr.h"
+#include "prism/internal/memchr.h"
-#define PRISM_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#define TRAILING_BYTE_MINIMUM 0x40
/**
* We need to roll our own memchr to handle cases where the encoding changes and
* we need to search for a character in a buffer that could be the trailing byte
* of a multibyte character.
*/
-void *
+const void *
pm_memchr(const void *memory, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding) {
- if (encoding_changed && encoding->multibyte && character >= PRISM_MEMCHR_TRAILING_BYTE_MINIMUM) {
+ if (encoding_changed && encoding->multibyte && character >= TRAILING_BYTE_MINIMUM) {
const uint8_t *source = (const uint8_t *) memory;
size_t index = 0;
@@ -31,5 +35,3 @@ pm_memchr(const void *memory, int character, size_t number, bool encoding_change
return memchr(memory, character, number);
}
}
-
-#undef PRISM_MEMCHR_TRAILING_BYTE_MINIMUM
diff --git a/prism/node.h b/prism/node.h
index e8686a327c..75bc3c9b2d 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -6,9 +6,10 @@
#ifndef PRISM_NODE_H
#define PRISM_NODE_H
-#include "prism/defines.h"
-#include "prism/parser.h"
-#include "prism/util/pm_buffer.h"
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/ast.h"
/**
* Loop through each node in the node list, writing each node to the given
@@ -18,51 +19,12 @@
for (size_t index = 0; index < (list)->size && ((node) = (list)->nodes[index]); index++)
/**
- * Append a new node onto the end of the node list.
- *
- * @param list The list to append to.
- * @param node The node to append.
- */
-void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
-
-/**
- * Prepend a new node onto the beginning of the node list.
- *
- * @param list The list to prepend to.
- * @param node The node to prepend.
- */
-void pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node);
-
-/**
- * Concatenate the given node list onto the end of the other node list.
- *
- * @param list The list to concatenate onto.
- * @param other The list to concatenate.
- */
-void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other);
-
-/**
- * Free the internal memory associated with the given node list.
- *
- * @param list The list to free.
- */
-void pm_node_list_free(pm_node_list_t *list);
-
-/**
- * Deallocate a node and all of its children.
- *
- * @param parser The parser that owns the node.
- * @param node The node to deallocate.
- */
-PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node);
-
-/**
* Returns a string representation of the given node type.
*
* @param node_type The node type to convert to a string.
- * @return A string representation of the given node type.
+ * @returns A string representation of the given node type.
*/
-PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_type);
+PRISM_EXPORTED_FUNCTION const char * pm_node_type(pm_node_type_t node_type);
/**
* Visit each of the nodes in this subtree using the given visitor callback. The
@@ -80,7 +42,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
* bool visit(const pm_node_t *node, void *data) {
* size_t *indent = (size_t *) data;
* for (size_t i = 0; i < *indent * 2; i++) putc(' ', stdout);
- * printf("%s\n", pm_node_type_to_str(node->type));
+ * printf("%s\n", pm_node_type(node->type));
*
* size_t next_indent = *indent + 1;
* size_t *next_data = &next_indent;
@@ -93,18 +55,21 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
* const char *source = "1 + 2; 3 + 4";
* size_t size = strlen(source);
*
- * pm_parser_t parser;
- * pm_options_t options = { 0 };
- * pm_parser_init(&parser, (const uint8_t *) source, size, &options);
+ * pm_arena_t *arena = pm_arena_new();
+ * pm_options_t *options = pm_options_new();
+ *
+ * pm_parser_t *parser = pm_parser_new(arena, (const uint8_t *) source, size, options);
*
* size_t indent = 0;
- * pm_node_t *node = pm_parse(&parser);
+ * pm_node_t *node = pm_parse(parser);
*
* size_t *data = &indent;
* pm_visit_node(node, visit, data);
*
- * pm_node_destroy(&parser, node);
- * pm_parser_free(&parser);
+ * pm_parser_free(parser);
+ * pm_options_free(options);
+ * pm_arena_free(arena);
+ *
* return EXIT_SUCCESS;
* }
* ```
@@ -113,7 +78,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_node_type_to_str(pm_node_type_t node_typ
* @param visitor The callback to call for each node in the subtree.
* @param data An opaque pointer that is passed to the visitor callback.
*/
-PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) PRISM_NONNULL(1);
/**
* Visit the children of the given node with the given callback. This is the
@@ -124,6 +89,6 @@ PRISM_EXPORTED_FUNCTION void pm_visit_node(const pm_node_t *node, bool (*visitor
* @param visitor The callback to call for each child node.
* @param data An opaque pointer that is passed to the visitor callback.
*/
-PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+PRISM_EXPORTED_FUNCTION void pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) PRISM_NONNULL(1);
#endif
diff --git a/prism/options.c b/prism/options.c
index a457178ce8..b589865a2a 100644
--- a/prism/options.c
+++ b/prism/options.c
@@ -1,18 +1,78 @@
-#include "prism/options.h"
+#include "prism/internal/options.h"
+
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/char.h"
+#include "prism/internal/stringy.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Allocate a new options struct. If the options struct cannot be allocated,
+ * this function aborts the process.
+ */
+pm_options_t *
+pm_options_new(void) {
+ pm_options_t *options = xcalloc(1, sizeof(pm_options_t));
+ if (options == NULL) abort();
+ return options;
+}
+
+/**
+ * Free the internal memory associated with the options.
+ */
+void
+pm_options_cleanup(pm_options_t *options) {
+ pm_string_cleanup(&options->filepath);
+ pm_string_cleanup(&options->encoding);
+
+ for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
+ pm_options_scope_t *scope = &options->scopes[scope_index];
+
+ for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
+ pm_string_cleanup(&scope->locals[local_index]);
+ }
+
+ xfree_sized(scope->locals, scope->locals_count * sizeof(pm_string_t));
+ }
+
+ xfree_sized(options->scopes, options->scopes_count * sizeof(pm_options_scope_t));
+}
+
+/**
+ * Free both the held memory of the given options struct and the struct itself.
+ *
+ * @param options The options struct to free.
+ */
+void
+pm_options_free(pm_options_t *options) {
+ pm_options_cleanup(options);
+ xfree_sized(options, sizeof(pm_options_t));
+}
/**
* Set the shebang callback option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) {
options->shebang_callback = shebang_callback;
options->shebang_callback_data = shebang_callback_data;
}
/**
+ * Get the filepath option on the given options struct.
+ */
+const pm_string_t *
+pm_options_filepath(const pm_options_t *options) {
+ return &options->filepath;
+}
+
+/**
* Set the filepath option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_filepath_set(pm_options_t *options, const char *filepath) {
pm_string_constant_init(&options->filepath, filepath, strlen(filepath));
}
@@ -20,7 +80,7 @@ pm_options_filepath_set(pm_options_t *options, const char *filepath) {
/**
* Set the encoding option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_encoding_set(pm_options_t *options, const char *encoding) {
pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
}
@@ -28,7 +88,7 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
/**
* Set the encoding_locked option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
options->encoding_locked = encoding_locked;
}
@@ -36,7 +96,7 @@ pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
/**
* Set the line option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_line_set(pm_options_t *options, int32_t line) {
options->line = line;
}
@@ -44,7 +104,7 @@ pm_options_line_set(pm_options_t *options, int32_t line) {
/**
* Set the frozen string literal option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) {
options->frozen_string_literal = frozen_string_literal ? PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED : PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
}
@@ -52,7 +112,7 @@ pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_l
/**
* Sets the command line option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_command_line_set(pm_options_t *options, uint8_t command_line) {
options->command_line = command_line;
}
@@ -60,7 +120,7 @@ pm_options_command_line_set(pm_options_t *options, uint8_t command_line) {
/**
* Checks if the given slice represents a number.
*/
-static inline bool
+static PRISM_INLINE bool
is_number(const char *string, size_t length) {
return pm_strspn_decimal_digit((const uint8_t *) string, (ptrdiff_t) length) == length;
}
@@ -70,7 +130,7 @@ is_number(const char *string, size_t length) {
* string. If the string contains an invalid option, this returns false.
* Otherwise, it returns true.
*/
-PRISM_EXPORTED_FUNCTION bool
+bool
pm_options_version_set(pm_options_t *options, const char *version, size_t length) {
if (version == NULL) {
options->version = PM_OPTIONS_VERSION_LATEST;
@@ -88,33 +148,43 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
return true;
}
- if (strncmp(version, "3.5", 3) == 0) {
- options->version = PM_OPTIONS_VERSION_LATEST;
+ if (strncmp(version, "3.5", 3) == 0 || strncmp(version, "4.0", 3) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+ return true;
+ }
+
+ if (strncmp(version, "4.1", 3) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
return true;
}
return false;
}
- if (length >= 4) {
- if (strncmp(version, "3.3.", 4) == 0 && is_number(version + 4, length - 4)) {
+ if (length >= 4 && is_number(version + 4, length - 4)) {
+ if (strncmp(version, "3.3.", 4) == 0) {
options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
return true;
}
- if (strncmp(version, "3.4.", 4) == 0 && is_number(version + 4, length - 4)) {
+ if (strncmp(version, "3.4.", 4) == 0) {
options->version = PM_OPTIONS_VERSION_CRUBY_3_4;
return true;
}
- if (strncmp(version, "3.5.", 4) == 0 && is_number(version + 4, length - 4)) {
- options->version = PM_OPTIONS_VERSION_LATEST;
+ if (strncmp(version, "3.5.", 4) == 0 || strncmp(version, "4.0.", 4) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_0;
+ return true;
+ }
+
+ if (strncmp(version, "4.1.", 4) == 0) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_4_1;
return true;
}
}
- if (length >= 6) {
- if (strncmp(version, "latest", 7) == 0) { // 7 to compare the \0 as well
+ if (length == 6) {
+ if (strncmp(version, "latest", 6) == 0) {
options->version = PM_OPTIONS_VERSION_LATEST;
return true;
}
@@ -124,9 +194,27 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length
}
/**
+ * Set the version option on the given options struct to the lowest version of
+ * Ruby that prism supports.
+ */
+void
+pm_options_version_set_lowest(pm_options_t *options) {
+ options->version = PM_OPTIONS_VERSION_CRUBY_3_3;
+}
+
+/**
+ * Set the version option on the given options struct to the highest version of
+ * Ruby that prism supports.
+ */
+void
+pm_options_version_set_highest(pm_options_t *options) {
+ options->version = PM_OPTIONS_VERSION_LATEST;
+}
+
+/**
* Set the main script option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_main_script_set(pm_options_t *options, bool main_script) {
options->main_script = main_script;
}
@@ -134,15 +222,23 @@ pm_options_main_script_set(pm_options_t *options, bool main_script) {
/**
* Set the partial script option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_partial_script_set(pm_options_t *options, bool partial_script) {
options->partial_script = partial_script;
}
/**
+ * Get the freeze option on the given options struct.
+ */
+bool
+pm_options_freeze(const pm_options_t *options) {
+ return options->freeze;
+}
+
+/**
* Set the freeze option on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_options_freeze_set(pm_options_t *options, bool freeze) {
options->freeze = freeze;
}
@@ -158,7 +254,7 @@ pm_options_freeze_set(pm_options_t *options, bool freeze) {
/**
* Allocate and zero out the scopes array on the given options struct.
*/
-PRISM_EXPORTED_FUNCTION bool
+bool
pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
options->scopes_count = scopes_count;
options->scopes = xcalloc(scopes_count, sizeof(pm_options_scope_t));
@@ -166,10 +262,20 @@ pm_options_scopes_init(pm_options_t *options, size_t scopes_count) {
}
/**
- * Return a pointer to the scope at the given index within the given options.
+ * Return a constant pointer to the scope at the given index within the given
+ * options.
*/
-PRISM_EXPORTED_FUNCTION const pm_options_scope_t *
-pm_options_scope_get(const pm_options_t *options, size_t index) {
+const pm_options_scope_t *
+pm_options_scope(const pm_options_t *options, size_t index) {
+ return &options->scopes[index];
+}
+
+/**
+ * Return a mutable pointer to the scope at the given index within the given
+ * options.
+ */
+pm_options_scope_t *
+pm_options_scope_mut(pm_options_t *options, size_t index) {
return &options->scopes[index];
}
@@ -177,49 +283,38 @@ pm_options_scope_get(const pm_options_t *options, size_t index) {
* Create a new options scope struct. This will hold a set of locals that are in
* scope surrounding the code that is being parsed.
*/
-PRISM_EXPORTED_FUNCTION bool
+void
pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) {
scope->locals_count = locals_count;
scope->locals = xcalloc(locals_count, sizeof(pm_string_t));
scope->forwarding = PM_OPTIONS_SCOPE_FORWARDING_NONE;
- return scope->locals != NULL;
+ if (scope->locals == NULL) abort();
}
/**
- * Return a pointer to the local at the given index within the given scope.
+ * Return a constant pointer to the local at the given index within the given
+ * scope.
*/
-PRISM_EXPORTED_FUNCTION const pm_string_t *
-pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) {
+const pm_string_t *
+pm_options_scope_local(const pm_options_scope_t *scope, size_t index) {
return &scope->locals[index];
}
/**
- * Set the forwarding option on the given scope struct.
+ * Return a mutable pointer to the local at the given index within the given
+ * scope.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) {
- scope->forwarding = forwarding;
+pm_string_t *
+pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) {
+ return &scope->locals[index];
}
/**
- * Free the internal memory associated with the options.
+ * Set the forwarding option on the given scope struct.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_options_free(pm_options_t *options) {
- pm_string_free(&options->filepath);
- pm_string_free(&options->encoding);
-
- for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
- pm_options_scope_t *scope = &options->scopes[scope_index];
-
- for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
- pm_string_free(&scope->locals[local_index]);
- }
-
- xfree(scope->locals);
- }
-
- xfree(options->scopes);
+void
+pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) {
+ scope->forwarding = forwarding;
}
/**
@@ -304,10 +399,7 @@ pm_options_read(pm_options_t *options, const char *data) {
data += 4;
pm_options_scope_t *scope = &options->scopes[scope_index];
- if (!pm_options_scope_init(scope, locals_count)) {
- pm_options_free(options);
- return;
- }
+ pm_options_scope_init(scope, locals_count);
uint8_t forwarding = (uint8_t) *data++;
pm_options_scope_forwarding_set(&options->scopes[scope_index], forwarding);
diff --git a/prism/options.h b/prism/options.h
index 2f64701b0c..0f5d7529b1 100644
--- a/prism/options.h
+++ b/prism/options.h
@@ -6,16 +6,27 @@
#ifndef PRISM_OPTIONS_H
#define PRISM_OPTIONS_H
-#include "prism/defines.h"
-#include "prism/util/pm_char.h"
-#include "prism/util/pm_string.h"
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/stringy.h"
#include <stdbool.h>
#include <stddef.h>
-#include <stdint.h>
/**
- * String literals should be made frozen.
+ * A scope of locals surrounding the code that is being parsed.
+ */
+typedef struct pm_options_scope_t pm_options_scope_t;
+
+/**
+ * The options that can be passed to the parser.
+ */
+typedef struct pm_options_t pm_options_t;
+
+/**
+ * String literals should not be frozen.
*/
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED ((int8_t) -1)
@@ -26,42 +37,25 @@
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET ((int8_t) 0)
/**
- * String literals should be made mutable.
+ * String literals should be made frozen.
*/
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED ((int8_t) 1)
-/**
- * A scope of locals surrounding the code that is being parsed.
- */
-typedef struct pm_options_scope {
- /** The number of locals in the scope. */
- size_t locals_count;
-
- /** The names of the locals in the scope. */
- pm_string_t *locals;
-
- /** Flags for the set of forwarding parameters in this scope. */
- uint8_t forwarding;
-} pm_options_scope_t;
-
/** The default value for parameters. */
static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_NONE = 0x0;
-/** When the scope is fowarding with the * parameter. */
+/** When the scope is forwarding with the * parameter. */
static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_POSITIONALS = 0x1;
-/** When the scope is fowarding with the ** parameter. */
+/** When the scope is forwarding with the ** parameter. */
static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_KEYWORDS = 0x2;
-/** When the scope is fowarding with the & parameter. */
+/** When the scope is forwarding with the & parameter. */
static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_BLOCK = 0x4;
-/** When the scope is fowarding with the ... parameter. */
+/** When the scope is forwarding with the ... parameter. */
static const uint8_t PM_OPTIONS_SCOPE_FORWARDING_ALL = 0x8;
-// Forward declaration needed by the callback typedef.
-struct pm_options;
-
/**
* The callback called when additional switches are found in a shebang comment
* that need to be processed by the runtime.
@@ -74,118 +68,7 @@ struct pm_options;
* @param shebang_callback_data Any additional data that should be passed along
* to the callback.
*/
-typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const uint8_t *source, size_t length, void *shebang_callback_data);
-
-/**
- * The version of Ruby syntax that we should be parsing with. This is used to
- * allow consumers to specify which behavior they want in case they need to
- * parse in the same way as a specific version of CRuby would have.
- */
-typedef enum {
- /** The current version of prism. */
- PM_OPTIONS_VERSION_LATEST = 0,
-
- /** The vendored version of prism in CRuby 3.3.x. */
- PM_OPTIONS_VERSION_CRUBY_3_3 = 1,
-
- /** The vendored version of prism in CRuby 3.4.x. */
- PM_OPTIONS_VERSION_CRUBY_3_4 = 2
-} pm_options_version_t;
-
-/**
- * The options that can be passed to the parser.
- */
-typedef struct pm_options {
- /**
- * The callback to call when additional switches are found in a shebang
- * comment.
- */
- pm_options_shebang_callback_t shebang_callback;
-
- /**
- * Any additional data that should be passed along to the shebang callback
- * if one was set.
- */
- void *shebang_callback_data;
-
- /** The name of the file that is currently being parsed. */
- pm_string_t filepath;
-
- /**
- * The line within the file that the parse starts on. This value is
- * 1-indexed.
- */
- int32_t line;
-
- /**
- * The name of the encoding that the source file is in. Note that this must
- * correspond to a name that can be found with Encoding.find in Ruby.
- */
- pm_string_t encoding;
-
- /**
- * The number of scopes surrounding the code that is being parsed.
- */
- size_t scopes_count;
-
- /**
- * The scopes surrounding the code that is being parsed. For most parses
- * this will be NULL, but for evals it will be the locals that are in scope
- * surrounding the eval. Scopes are ordered from the outermost scope to the
- * innermost one.
- */
- pm_options_scope_t *scopes;
-
- /**
- * The version of prism that we should be parsing with. This is used to
- * allow consumers to specify which behavior they want in case they need to
- * parse exactly as a specific version of CRuby.
- */
- pm_options_version_t version;
-
- /** A bitset of the various options that were set on the command line. */
- uint8_t command_line;
-
- /**
- * Whether or not the frozen string literal option has been set.
- * May be:
- * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
- * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
- * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
- */
- int8_t frozen_string_literal;
-
- /**
- * Whether or not the encoding magic comments should be respected. This is a
- * niche use-case where you want to parse a file with a specific encoding
- * but ignore any encoding magic comments at the top of the file.
- */
- bool encoding_locked;
-
- /**
- * When the file being parsed is the main script, the shebang will be
- * considered for command-line flags (or for implicit -x). The caller needs
- * to pass this information to the parser so that it can behave correctly.
- */
- bool main_script;
-
- /**
- * When the file being parsed is considered a "partial" script, jumps will
- * not be marked as errors if they are not contained within loops/blocks.
- * This is used in the case that you're parsing a script that you know will
- * be embedded inside another script later, but you do not have that context
- * yet. For example, when parsing an ERB template that will be evaluated
- * inside another script.
- */
- bool partial_script;
-
- /**
- * Whether or not the parser should freeze the nodes that it creates. This
- * makes it possible to have a deeply frozen AST that is safe to share
- * between concurrency primitives.
- */
- bool freeze;
-} pm_options_t;
+typedef void (*pm_options_shebang_callback_t)(pm_options_t *options, const uint8_t *source, size_t length, void *shebang_callback_data);
/**
* A bit representing whether or not the command line -a option was set. -a
@@ -220,11 +103,27 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_P = 0x10;
/**
* A bit representing whether or not the command line -x option was set. -x
- * searches the input file for a shebang that matches the current Ruby engine.
+ * searches the input file for a shebang that includes "ruby".
*/
static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
/**
+ * Allocate a new options struct. If the options struct cannot be allocated,
+ * this function aborts the process.
+ *
+ * @returns A new options struct with default values. It is the responsibility
+ * of the caller to free this struct using pm_options_free().
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_options_t * pm_options_new(void);
+
+/**
+ * Free both the held memory of the given options struct and the struct itself.
+ *
+ * @param options The options struct to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options) PRISM_NONNULL(1);
+
+/**
* Set the shebang callback option on the given options struct.
*
* @param options The options struct to set the shebang callback on.
@@ -232,7 +131,15 @@ static const uint8_t PM_OPTIONS_COMMAND_LINE_X = 0x20;
* @param shebang_callback_data Any additional data that should be passed along
* to the callback.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data);
+PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *options, pm_options_shebang_callback_t shebang_callback, void *shebang_callback_data) PRISM_NONNULL(1);
+
+/**
+ * Get the filepath option on the given options struct.
+ *
+ * @param options The options struct to get the filepath from.
+ * @returns The filepath.
+ */
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_filepath(const pm_options_t *options) PRISM_NONNULL(1);
/**
* Set the filepath option on the given options struct.
@@ -240,7 +147,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_shebang_callback_set(pm_options_t *optio
* @param options The options struct to set the filepath on.
* @param filepath The filepath to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath);
+PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath) PRISM_NONNULL(1);
/**
* Set the line option on the given options struct.
@@ -248,7 +155,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
* @param options The options struct to set the line on.
* @param line The line to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
+PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line) PRISM_NONNULL(1);
/**
* Set the encoding option on the given options struct.
@@ -256,7 +163,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
* @param options The options struct to set the encoding on.
* @param encoding The encoding to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding) PRISM_NONNULL(1);
/**
* Set the encoding_locked option on the given options struct.
@@ -264,7 +171,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, cons
* @param options The options struct to set the encoding_locked value on.
* @param encoding_locked The encoding_locked value to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
+PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) PRISM_NONNULL(1);
/**
* Set the frozen string literal option on the given options struct.
@@ -272,7 +179,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *option
* @param options The options struct to set the frozen string literal value on.
* @param frozen_string_literal The frozen string literal value to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal);
+PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) PRISM_NONNULL(1);
/**
* Sets the command line option on the given options struct.
@@ -280,7 +187,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *
* @param options The options struct to set the command line option on.
* @param command_line The command_line value to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line);
+PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options, uint8_t command_line) PRISM_NONNULL(1);
/**
* Set the version option on the given options struct by parsing the given
@@ -290,9 +197,25 @@ PRISM_EXPORTED_FUNCTION void pm_options_command_line_set(pm_options_t *options,
* @param options The options struct to set the version on.
* @param version The version to set.
* @param length The length of the version string.
- * @return Whether or not the version was parsed successfully.
+ * @returns Whether or not the version was parsed successfully.
*/
-PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length);
+PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const char *version, size_t length) PRISM_NONNULL(1);
+
+/**
+ * Set the version option on the given options struct to the lowest version of
+ * Ruby that prism supports.
+ *
+ * @param options The options struct to set the version on.
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_version_set_lowest(pm_options_t *options) PRISM_NONNULL(1);
+
+/**
+ * Set the version option on the given options struct to the highest version of
+ * Ruby that prism supports.
+ *
+ * @param options The options struct to set the version on.
+ */
+PRISM_EXPORTED_FUNCTION void pm_options_version_set_highest(pm_options_t *options) PRISM_NONNULL(1);
/**
* Set the main script option on the given options struct.
@@ -300,7 +223,7 @@ PRISM_EXPORTED_FUNCTION bool pm_options_version_set(pm_options_t *options, const
* @param options The options struct to set the main script value on.
* @param main_script The main script value to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script);
+PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, bool main_script) PRISM_NONNULL(1);
/**
* Set the partial script option on the given options struct.
@@ -308,7 +231,15 @@ PRISM_EXPORTED_FUNCTION void pm_options_main_script_set(pm_options_t *options, b
* @param options The options struct to set the partial script value on.
* @param partial_script The partial script value to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script);
+PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options, bool partial_script) PRISM_NONNULL(1);
+
+/**
+ * Get the freeze option on the given options struct.
+ *
+ * @param options The options struct to get the freeze value from.
+ * @returns The freeze value.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_freeze(const pm_options_t *options) PRISM_NONNULL(1);
/**
* Set the freeze option on the given options struct.
@@ -316,127 +247,73 @@ PRISM_EXPORTED_FUNCTION void pm_options_partial_script_set(pm_options_t *options
* @param options The options struct to set the freeze value on.
* @param freeze The freeze value to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze);
+PRISM_EXPORTED_FUNCTION void pm_options_freeze_set(pm_options_t *options, bool freeze) PRISM_NONNULL(1);
/**
* Allocate and zero out the scopes array on the given options struct.
*
* @param options The options struct to initialize the scopes array on.
* @param scopes_count The number of scopes to allocate.
- * @return Whether or not the scopes array was initialized successfully.
+ * @returns Whether or not the scopes array was initialized successfully.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count) PRISM_NONNULL(1);
+
+/**
+ * Return a constant pointer to the scope at the given index within the given
+ * options.
+ *
+ * @param options The options struct to get the scope from.
+ * @param index The index of the scope to get.
+ * @returns A constant pointer to the scope at the given index.
*/
-PRISM_EXPORTED_FUNCTION bool pm_options_scopes_init(pm_options_t *options, size_t scopes_count);
+PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope(const pm_options_t *options, size_t index) PRISM_NONNULL(1);
/**
- * Return a pointer to the scope at the given index within the given options.
+ * Return a mutable pointer to the scope at the given index within the given
+ * options.
*
* @param options The options struct to get the scope from.
* @param index The index of the scope to get.
- * @return A pointer to the scope at the given index.
+ * @returns A mutable pointer to the scope at the given index.
*/
-PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index);
+PRISM_EXPORTED_FUNCTION pm_options_scope_t * pm_options_scope_mut(pm_options_t *options, size_t index) PRISM_NONNULL(1);
/**
* Create a new options scope struct. This will hold a set of locals that are in
- * scope surrounding the code that is being parsed.
+ * scope surrounding the code that is being parsed. If the scope was unable to
+ * allocate its locals, this function will abort the process.
*
* @param scope The scope struct to initialize.
* @param locals_count The number of locals to allocate.
- * @return Whether or not the scope was initialized successfully.
*/
-PRISM_EXPORTED_FUNCTION bool pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count);
+PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) PRISM_NONNULL(1);
/**
- * Return a pointer to the local at the given index within the given scope.
+ * Return a constant pointer to the local at the given index within the given
+ * scope.
*
* @param scope The scope struct to get the local from.
* @param index The index of the local to get.
- * @return A pointer to the local at the given index.
- */
-PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index);
-
-/**
- * Set the forwarding option on the given scope struct.
- *
- * @param scope The scope struct to set the forwarding on.
- * @param forwarding The forwarding value to set.
+ * @returns A constant pointer to the local at the given index.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding);
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local(const pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1);
/**
- * Free the internal memory associated with the options.
+ * Return a mutable pointer to the local at the given index within the given
+ * scope.
*
- * @param options The options struct whose internal memory should be freed.
+ * @param scope The scope struct to get the local from.
+ * @param index The index of the local to get.
+ * @returns A mutable pointer to the local at the given index.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options);
+PRISM_EXPORTED_FUNCTION pm_string_t * pm_options_scope_local_mut(pm_options_scope_t *scope, size_t index) PRISM_NONNULL(1);
/**
- * Deserialize an options struct from the given binary string. This is used to
- * pass options to the parser from an FFI call so that consumers of the library
- * from an FFI perspective don't have to worry about the structure of our
- * options structs. Since the source of these calls will be from Ruby
- * implementation internals we assume it is from a trusted source.
- *
- * `data` is assumed to be a valid pointer pointing to well-formed data. The
- * layout of this data should be the same every time, and is described below:
- *
- * | # bytes | field |
- * | ------- | -------------------------- |
- * | `4` | the length of the filepath |
- * | ... | the filepath bytes |
- * | `4` | the line number |
- * | `4` | the length the encoding |
- * | ... | the encoding bytes |
- * | `1` | frozen string literal |
- * | `1` | -p command line option |
- * | `1` | -n command line option |
- * | `1` | -l command line option |
- * | `1` | -a command line option |
- * | `1` | the version |
- * | `1` | encoding locked |
- * | `1` | main script |
- * | `1` | partial script |
- * | `1` | freeze |
- * | `4` | the number of scopes |
- * | ... | the scopes |
- *
- * The version field is an enum, so it should be one of the following values:
- *
- * | value | version |
- * | ----- | ------------------------- |
- * | `0` | use the latest version of prism |
- * | `1` | use the version of prism that is vendored in CRuby 3.3.0 |
- *
- * Each scope is laid out as follows:
- *
- * | # bytes | field |
- * | ------- | -------------------------- |
- * | `4` | the number of locals |
- * | `1` | the forwarding flags |
- * | ... | the locals |
- *
- * Each local is laid out as follows:
- *
- * | # bytes | field |
- * | ------- | -------------------------- |
- * | `4` | the length of the local |
- * | ... | the local bytes |
- *
- * Some additional things to note about this layout:
- *
- * * The filepath can have a length of 0, in which case we'll consider it an
- * empty string.
- * * The line number should be 0-indexed.
- * * The encoding can have a length of 0, in which case we'll use the default
- * encoding (UTF-8). If it's not 0, it should correspond to a name of an
- * encoding that can be passed to `Encoding.find` in Ruby.
- * * The frozen string literal, encoding locked, main script, and partial script
- * fields are booleans, so their values should be either 0 or 1.
- * * The number of scopes can be 0.
+ * Set the forwarding option on the given scope struct.
*
- * @param options The options struct to deserialize into.
- * @param data The binary string to deserialize from.
+ * @param scope The scope struct to set the forwarding on.
+ * @param forwarding The forwarding value to set.
*/
-void pm_options_read(pm_options_t *options, const char *data);
+PRISM_EXPORTED_FUNCTION void pm_options_scope_forwarding_set(pm_options_scope_t *scope, uint8_t forwarding) PRISM_NONNULL(1);
#endif
diff --git a/prism/pack.c b/prism/pack.c
deleted file mode 100644
index 1388ca8a3b..0000000000
--- a/prism/pack.c
+++ /dev/null
@@ -1,509 +0,0 @@
-#include "prism/pack.h"
-
-// We optionally support parsing String#pack templates. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_PACK define.
-#ifdef PRISM_EXCLUDE_PACK
-
-void pm_pack_parse(void) {}
-
-#else
-
-#include <stdbool.h>
-#include <errno.h>
-
-static uintmax_t
-strtoumaxc(const char **format) {
- uintmax_t value = 0;
- while (**format >= '0' && **format <= '9') {
- if (value > UINTMAX_MAX / 10) {
- errno = ERANGE;
- }
- value = value * 10 + ((uintmax_t) (**format - '0'));
- (*format)++;
- }
- return value;
-}
-
-PRISM_EXPORTED_FUNCTION pm_pack_result
-pm_pack_parse(
- pm_pack_variant variant,
- const char **format,
- const char *format_end,
- pm_pack_type *type,
- pm_pack_signed *signed_type,
- pm_pack_endian *endian,
- pm_pack_size *size,
- pm_pack_length_type *length_type,
- uint64_t *length,
- pm_pack_encoding *encoding
-) {
- if (*encoding == PM_PACK_ENCODING_START) {
- *encoding = PM_PACK_ENCODING_US_ASCII;
- }
-
- if (*format == format_end) {
- *type = PM_PACK_END;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- *length_type = PM_PACK_LENGTH_NA;
- return PM_PACK_OK;
- }
-
- *length_type = PM_PACK_LENGTH_FIXED;
- *length = 1;
- bool length_changed_allowed = true;
-
- char directive = **format;
- (*format)++;
- switch (directive) {
- case ' ':
- case '\t':
- case '\n':
- case '\v':
- case '\f':
- case '\r':
- *type = PM_PACK_SPACE;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- *length_type = PM_PACK_LENGTH_NA;
- *length = 0;
- return PM_PACK_OK;
- case '#':
- while ((*format < format_end) && (**format != '\n')) {
- (*format)++;
- }
- *type = PM_PACK_COMMENT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- *length_type = PM_PACK_LENGTH_NA;
- *length = 0;
- return PM_PACK_OK;
- case 'C':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_AGNOSTIC_ENDIAN;
- *size = PM_PACK_SIZE_8;
- break;
- case 'S':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_16;
- break;
- case 'L':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_32;
- break;
- case 'Q':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_64;
- break;
- case 'J':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_P;
- break;
- case 'c':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_SIGNED;
- *endian = PM_PACK_AGNOSTIC_ENDIAN;
- *size = PM_PACK_SIZE_8;
- break;
- case 's':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_SIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_16;
- break;
- case 'l':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_SIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_32;
- break;
- case 'q':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_SIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_64;
- break;
- case 'j':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_SIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_P;
- break;
- case 'I':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_INT;
- break;
- case 'i':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_SIGNED;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_INT;
- break;
- case 'n':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_BIG_ENDIAN;
- *size = PM_PACK_SIZE_16;
- length_changed_allowed = false;
- break;
- case 'N':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_BIG_ENDIAN;
- *size = PM_PACK_SIZE_32;
- length_changed_allowed = false;
- break;
- case 'v':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_LITTLE_ENDIAN;
- *size = PM_PACK_SIZE_16;
- length_changed_allowed = false;
- break;
- case 'V':
- *type = PM_PACK_INTEGER;
- *signed_type = PM_PACK_UNSIGNED;
- *endian = PM_PACK_LITTLE_ENDIAN;
- *size = PM_PACK_SIZE_32;
- length_changed_allowed = false;
- break;
- case 'U':
- *type = PM_PACK_UTF8;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'w':
- *type = PM_PACK_BER;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'D':
- case 'd':
- *type = PM_PACK_FLOAT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_64;
- break;
- case 'F':
- case 'f':
- *type = PM_PACK_FLOAT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_NATIVE_ENDIAN;
- *size = PM_PACK_SIZE_32;
- break;
- case 'E':
- *type = PM_PACK_FLOAT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_LITTLE_ENDIAN;
- *size = PM_PACK_SIZE_64;
- break;
- case 'e':
- *type = PM_PACK_FLOAT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_LITTLE_ENDIAN;
- *size = PM_PACK_SIZE_32;
- break;
- case 'G':
- *type = PM_PACK_FLOAT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_BIG_ENDIAN;
- *size = PM_PACK_SIZE_64;
- break;
- case 'g':
- *type = PM_PACK_FLOAT;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_BIG_ENDIAN;
- *size = PM_PACK_SIZE_32;
- break;
- case 'A':
- *type = PM_PACK_STRING_SPACE_PADDED;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'a':
- *type = PM_PACK_STRING_NULL_PADDED;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'Z':
- *type = PM_PACK_STRING_NULL_TERMINATED;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'B':
- *type = PM_PACK_STRING_MSB;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'b':
- *type = PM_PACK_STRING_LSB;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'H':
- *type = PM_PACK_STRING_HEX_HIGH;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'h':
- *type = PM_PACK_STRING_HEX_LOW;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'u':
- *type = PM_PACK_STRING_UU;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'M':
- *type = PM_PACK_STRING_MIME;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'm':
- *type = PM_PACK_STRING_BASE64;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'P':
- *type = PM_PACK_STRING_FIXED;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'p':
- *type = PM_PACK_STRING_POINTER;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case '@':
- *type = PM_PACK_MOVE;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'X':
- *type = PM_PACK_BACK;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case 'x':
- *type = PM_PACK_NULL;
- *signed_type = PM_PACK_SIGNED_NA;
- *endian = PM_PACK_ENDIAN_NA;
- *size = PM_PACK_SIZE_NA;
- break;
- case '%':
- return PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE;
- default:
- return PM_PACK_ERROR_UNKNOWN_DIRECTIVE;
- }
-
- bool explicit_endian = false;
-
- while (*format < format_end) {
- switch (**format) {
- case '_':
- case '!':
- (*format)++;
- if (*type != PM_PACK_INTEGER || !length_changed_allowed) {
- return PM_PACK_ERROR_BANG_NOT_ALLOWED;
- }
- switch (*size) {
- case PM_PACK_SIZE_SHORT:
- case PM_PACK_SIZE_INT:
- case PM_PACK_SIZE_LONG:
- case PM_PACK_SIZE_LONG_LONG:
- break;
- case PM_PACK_SIZE_16:
- *size = PM_PACK_SIZE_SHORT;
- break;
- case PM_PACK_SIZE_32:
- *size = PM_PACK_SIZE_LONG;
- break;
- case PM_PACK_SIZE_64:
- *size = PM_PACK_SIZE_LONG_LONG;
- break;
- case PM_PACK_SIZE_P:
- break;
- default:
- return PM_PACK_ERROR_BANG_NOT_ALLOWED;
- }
- break;
- case '<':
- (*format)++;
- if (explicit_endian) {
- return PM_PACK_ERROR_DOUBLE_ENDIAN;
- }
- *endian = PM_PACK_LITTLE_ENDIAN;
- explicit_endian = true;
- break;
- case '>':
- (*format)++;
- if (explicit_endian) {
- return PM_PACK_ERROR_DOUBLE_ENDIAN;
- }
- *endian = PM_PACK_BIG_ENDIAN;
- explicit_endian = true;
- break;
- default:
- goto exit_modifier_loop;
- }
- }
-
-exit_modifier_loop:
-
- if (variant == PM_PACK_VARIANT_UNPACK && *type == PM_PACK_MOVE) {
- *length = 0;
- }
-
- if (*format < format_end) {
- if (**format == '*') {
- switch (*type) {
- case PM_PACK_NULL:
- case PM_PACK_BACK:
- switch (variant) {
- case PM_PACK_VARIANT_PACK:
- *length_type = PM_PACK_LENGTH_FIXED;
- break;
- case PM_PACK_VARIANT_UNPACK:
- *length_type = PM_PACK_LENGTH_MAX;
- break;
- }
- *length = 0;
- break;
-
- case PM_PACK_MOVE:
- switch (variant) {
- case PM_PACK_VARIANT_PACK:
- *length_type = PM_PACK_LENGTH_FIXED;
- break;
- case PM_PACK_VARIANT_UNPACK:
- *length_type = PM_PACK_LENGTH_RELATIVE;
- break;
- }
- *length = 0;
- break;
-
- case PM_PACK_STRING_UU:
- *length_type = PM_PACK_LENGTH_FIXED;
- *length = 0;
- break;
-
- case PM_PACK_STRING_FIXED:
- switch (variant) {
- case PM_PACK_VARIANT_PACK:
- *length_type = PM_PACK_LENGTH_FIXED;
- *length = 1;
- break;
- case PM_PACK_VARIANT_UNPACK:
- *length_type = PM_PACK_LENGTH_MAX;
- *length = 0;
- break;
- }
- break;
-
- case PM_PACK_STRING_MIME:
- case PM_PACK_STRING_BASE64:
- *length_type = PM_PACK_LENGTH_FIXED;
- *length = 1;
- break;
-
- default:
- *length_type = PM_PACK_LENGTH_MAX;
- *length = 0;
- break;
- }
-
- (*format)++;
- } else if (**format >= '0' && **format <= '9') {
- errno = 0;
- *length_type = PM_PACK_LENGTH_FIXED;
- #if UINTMAX_MAX < UINT64_MAX
- #error "prism's design assumes uintmax_t is at least as large as uint64_t"
- #endif
- uintmax_t length_max = strtoumaxc(format);
- if (errno || length_max > UINT64_MAX) {
- return PM_PACK_ERROR_LENGTH_TOO_BIG;
- }
- *length = (uint64_t) length_max;
- }
- }
-
- switch (*type) {
- case PM_PACK_UTF8:
- /* if encoding is US-ASCII, upgrade to UTF-8 */
- if (*encoding == PM_PACK_ENCODING_US_ASCII) {
- *encoding = PM_PACK_ENCODING_UTF_8;
- }
- break;
- case PM_PACK_STRING_MIME:
- case PM_PACK_STRING_BASE64:
- case PM_PACK_STRING_UU:
- /* keep US-ASCII (do nothing) */
- break;
- default:
- /* fall back to BINARY */
- *encoding = PM_PACK_ENCODING_ASCII_8BIT;
- break;
- }
-
- return PM_PACK_OK;
-}
-
-PRISM_EXPORTED_FUNCTION size_t
-pm_size_to_native(pm_pack_size size) {
- switch (size) {
- case PM_PACK_SIZE_SHORT:
- return sizeof(short);
- case PM_PACK_SIZE_INT:
- return sizeof(int);
- case PM_PACK_SIZE_LONG:
- return sizeof(long);
- case PM_PACK_SIZE_LONG_LONG:
- return sizeof(long long);
- case PM_PACK_SIZE_8:
- return 1;
- case PM_PACK_SIZE_16:
- return 2;
- case PM_PACK_SIZE_32:
- return 4;
- case PM_PACK_SIZE_64:
- return 8;
- case PM_PACK_SIZE_P:
- return sizeof(void *);
- default:
- return 0;
- }
-}
-
-#endif
diff --git a/prism/pack.h b/prism/pack.h
deleted file mode 100644
index 0b0b4b19cc..0000000000
--- a/prism/pack.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * @file pack.h
- *
- * A pack template string parser.
- */
-#ifndef PRISM_PACK_H
-#define PRISM_PACK_H
-
-#include "prism/defines.h"
-
-// We optionally support parsing String#pack templates. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_PACK define.
-#ifdef PRISM_EXCLUDE_PACK
-
-void pm_pack_parse(void);
-
-#else
-
-#include <stdint.h>
-#include <stdlib.h>
-
-/** The version of the pack template language that we are parsing. */
-typedef enum pm_pack_version {
- PM_PACK_VERSION_3_2_0
-} pm_pack_version;
-
-/** The type of pack template we are parsing. */
-typedef enum pm_pack_variant {
- PM_PACK_VARIANT_PACK,
- PM_PACK_VARIANT_UNPACK
-} pm_pack_variant;
-
-/** A directive within the pack template. */
-typedef enum pm_pack_type {
- PM_PACK_SPACE,
- PM_PACK_COMMENT,
- PM_PACK_INTEGER,
- PM_PACK_UTF8,
- PM_PACK_BER,
- PM_PACK_FLOAT,
- PM_PACK_STRING_SPACE_PADDED,
- PM_PACK_STRING_NULL_PADDED,
- PM_PACK_STRING_NULL_TERMINATED,
- PM_PACK_STRING_MSB,
- PM_PACK_STRING_LSB,
- PM_PACK_STRING_HEX_HIGH,
- PM_PACK_STRING_HEX_LOW,
- PM_PACK_STRING_UU,
- PM_PACK_STRING_MIME,
- PM_PACK_STRING_BASE64,
- PM_PACK_STRING_FIXED,
- PM_PACK_STRING_POINTER,
- PM_PACK_MOVE,
- PM_PACK_BACK,
- PM_PACK_NULL,
- PM_PACK_END
-} pm_pack_type;
-
-/** The signness of a pack directive. */
-typedef enum pm_pack_signed {
- PM_PACK_UNSIGNED,
- PM_PACK_SIGNED,
- PM_PACK_SIGNED_NA
-} pm_pack_signed;
-
-/** The endianness of a pack directive. */
-typedef enum pm_pack_endian {
- PM_PACK_AGNOSTIC_ENDIAN,
- PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
- PM_PACK_BIG_ENDIAN, // aka 'network', or 'N'
- PM_PACK_NATIVE_ENDIAN,
- PM_PACK_ENDIAN_NA
-} pm_pack_endian;
-
-/** The size of an integer pack directive. */
-typedef enum pm_pack_size {
- PM_PACK_SIZE_SHORT,
- PM_PACK_SIZE_INT,
- PM_PACK_SIZE_LONG,
- PM_PACK_SIZE_LONG_LONG,
- PM_PACK_SIZE_8,
- PM_PACK_SIZE_16,
- PM_PACK_SIZE_32,
- PM_PACK_SIZE_64,
- PM_PACK_SIZE_P,
- PM_PACK_SIZE_NA
-} pm_pack_size;
-
-/** The type of length of a pack directive. */
-typedef enum pm_pack_length_type {
- PM_PACK_LENGTH_FIXED,
- PM_PACK_LENGTH_MAX,
- PM_PACK_LENGTH_RELATIVE, // special case for unpack @*
- PM_PACK_LENGTH_NA
-} pm_pack_length_type;
-
-/** The type of encoding for a pack template string. */
-typedef enum pm_pack_encoding {
- PM_PACK_ENCODING_START,
- PM_PACK_ENCODING_ASCII_8BIT,
- PM_PACK_ENCODING_US_ASCII,
- PM_PACK_ENCODING_UTF_8
-} pm_pack_encoding;
-
-/** The result of parsing a pack template. */
-typedef enum pm_pack_result {
- PM_PACK_OK,
- PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
- PM_PACK_ERROR_UNKNOWN_DIRECTIVE,
- PM_PACK_ERROR_LENGTH_TOO_BIG,
- PM_PACK_ERROR_BANG_NOT_ALLOWED,
- PM_PACK_ERROR_DOUBLE_ENDIAN
-} pm_pack_result;
-
-/**
- * Parse a single directive from a pack or unpack format string.
- *
- * @param variant (in) pack or unpack
- * @param format (in, out) the start of the next directive to parse on calling,
- * and advanced beyond the parsed directive on return, or as much of it as
- * was consumed until an error was encountered
- * @param format_end (in) the end of the format string
- * @param type (out) the type of the directive
- * @param signed_type (out) whether the value is signed
- * @param endian (out) the endianness of the value
- * @param size (out) the size of the value
- * @param length_type (out) what kind of length is specified
- * @param length (out) the length of the directive
- * @param encoding (in, out) takes the current encoding of the string which
- * would result from parsing the whole format string, and returns a possibly
- * changed directive - the encoding should be `PM_PACK_ENCODING_START` when
- * pm_pack_parse is called for the first directive in a format string
- *
- * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
- * @note Consult Ruby documentation for the meaning of directives.
- */
-PRISM_EXPORTED_FUNCTION pm_pack_result
-pm_pack_parse(
- pm_pack_variant variant,
- const char **format,
- const char *format_end,
- pm_pack_type *type,
- pm_pack_signed *signed_type,
- pm_pack_endian *endian,
- pm_pack_size *size,
- pm_pack_length_type *length_type,
- uint64_t *length,
- pm_pack_encoding *encoding
-);
-
-/**
- * Prism abstracts sizes away from the native system - this converts an abstract
- * size to a native size.
- *
- * @param size The abstract size to convert.
- * @return The native size.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
-
-#endif
-
-#endif
diff --git a/prism/parser.c b/prism/parser.c
new file mode 100644
index 0000000000..415cd31984
--- /dev/null
+++ b/prism/parser.c
@@ -0,0 +1,302 @@
+#include "prism/internal/parser.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/magic_comments.h"
+
+#include <stdlib.h>
+
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ */
+void
+pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
+ parser->encoding_changed_callback = callback;
+}
+
+/**
+ * Register a callback that will be called whenever a token is lexed.
+ */
+void
+pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) {
+ parser->lex_callback.callback = callback;
+ parser->lex_callback.data = data;
+}
+
+/**
+ * Returns the opaque data that is passed to the lex callback when it is called.
+ */
+void *
+pm_parser_lex_callback_data(const pm_parser_t *parser) {
+ return parser->lex_callback.data;
+}
+
+/**
+ * Returns the raw pointer to the start of the source that is being parsed.
+ */
+const uint8_t *
+pm_parser_start(const pm_parser_t *parser) {
+ return parser->start;
+}
+
+/**
+ * Returns the raw pointer to the end of the source that is being parsed.
+ */
+const uint8_t *
+pm_parser_end(const pm_parser_t *parser) {
+ return parser->end;
+}
+
+/**
+ * Returns the line that the parser was considered to have started on.
+ *
+ * @param parser the parser whose start line we want to get
+ * @return the line that the parser was considered to have started on
+ */
+int32_t
+pm_parser_start_line(const pm_parser_t *parser) {
+ return parser->start_line;
+}
+
+/**
+ * Returns the name of the encoding that is being used to parse the source.
+ */
+const char *
+pm_parser_encoding_name(const pm_parser_t *parser) {
+ return parser->encoding->name;
+}
+
+/**
+ * Returns the width of the character at the given pointer in the encoding that
+ * is being used to parse the source.
+ */
+size_t
+pm_parser_encoding_char_width(const pm_parser_t *parser, const uint8_t *start, ptrdiff_t remaining) {
+ return parser->encoding->char_width(start, remaining);
+}
+
+/**
+ * Returns whether or not the parser is using the US-ASCII encoding.
+ */
+bool
+pm_parser_encoding_us_ascii(const pm_parser_t *parser) {
+ return parser->encoding == PM_ENCODING_US_ASCII_ENTRY;
+}
+
+/**
+ * Returns the filepath that is being used to parse the source.
+ */
+const pm_string_t *
+pm_parser_filepath(const pm_parser_t *parser) {
+ return &parser->filepath;
+}
+
+/**
+ * Find a constant in the parser's constant pool. Returns the id of the
+ * constant, or 0 if the constant is not found.
+ */
+pm_constant_id_t
+pm_parser_constant_find(const pm_parser_t *parser, const uint8_t *start, size_t length) {
+ return pm_constant_pool_find(&parser->constant_pool, start, length);
+}
+
+/**
+ * Returns the frozen string literal value of the parser.
+ */
+int8_t
+pm_parser_frozen_string_literal(const pm_parser_t *parser) {
+ return parser->frozen_string_literal;
+}
+
+/**
+ * Returns the line offsets that are associated with the given parser.
+ *
+ * @param parser the parser whose line offsets we want to get
+ * @return the line offsets that are associated with the given parser
+ */
+const pm_line_offset_list_t *
+pm_parser_line_offsets(const pm_parser_t *parser) {
+ return &parser->line_offsets;
+}
+
+/**
+ * Returns the location of the __DATA__ section that is associated with the
+ * given parser, if it exists.
+ */
+const pm_location_t *
+pm_parser_data_loc(const pm_parser_t *parser) {
+ return &parser->data_loc;
+}
+
+/**
+ * Returns whether the given parser is continuable, meaning that it could become
+ * valid if more input were appended, as opposed to being definitively invalid.
+ */
+bool
+pm_parser_continuable(const pm_parser_t *parser) {
+ return parser->continuable;
+}
+
+/**
+ * Returns the lex state of the parser. Note that this is an internal detail,
+ * and we are purposefully not returning an instance of the internal enum that
+ * we use to track this. This is only exposed because we need it for some very
+ * niche use cases. Most consumers should avoid this function.
+ */
+int
+pm_parser_lex_state(const pm_parser_t *parser) {
+ return (int) parser->lex_state;
+}
+
+/**
+ * Returns the location associated with the given comment.
+ */
+pm_location_t
+pm_comment_location(const pm_comment_t *comment) {
+ return comment->location;
+}
+
+/**
+ * Returns the type associated with the given comment.
+ */
+pm_comment_type_t
+pm_comment_type(const pm_comment_t *comment) {
+ return comment->type;
+}
+
+/**
+ * Returns the number of comments associated with the given parser.
+ */
+size_t
+pm_parser_comments_size(const pm_parser_t *parser) {
+ return parser->comment_list.size;
+}
+
+/**
+ * Iterates over the comments associated with the given parser and calls the
+ * given callback for each comment.
+ */
+void
+pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data) {
+ const pm_list_node_t *current = parser->comment_list.head;
+ while (current != NULL) {
+ const pm_comment_t *comment = (const pm_comment_t *) current;
+ callback(comment, data);
+ current = current->next;
+ }
+}
+
+/**
+ * Returns the location associated with the given magic comment key.
+ */
+pm_location_t
+pm_magic_comment_key(const pm_magic_comment_t *magic_comment) {
+ return magic_comment->key;
+}
+
+/**
+ * Returns the location associated with the given magic comment value.
+ */
+pm_location_t
+pm_magic_comment_value(const pm_magic_comment_t *magic_comment) {
+ return magic_comment->value;
+}
+
+/**
+ * Returns the number of magic comments associated with the given parser.
+ */
+size_t
+pm_parser_magic_comments_size(const pm_parser_t *parser) {
+ return parser->magic_comment_list.size;
+}
+
+/**
+ * Iterates over the magic comments associated with the given parser and calls
+ * the given callback for each magic comment.
+ */
+void
+pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data) {
+ const pm_list_node_t *current = parser->magic_comment_list.head;
+ while (current != NULL) {
+ const pm_magic_comment_t *magic_comment = (const pm_magic_comment_t *) current;
+ callback(magic_comment, data);
+ current = current->next;
+ }
+}
+
+/**
+ * Returns the number of errors associated with the given parser.
+ */
+size_t
+pm_parser_errors_size(const pm_parser_t *parser) {
+ return parser->error_list.size;
+}
+
+/**
+ * Returns the number of warnings associated with the given parser.
+ */
+size_t
+pm_parser_warnings_size(const pm_parser_t *parser) {
+ return parser->warning_list.size;
+}
+
+static inline void
+pm_parser_diagnostics_each(const pm_list_t *list, pm_diagnostic_callback_t callback, void *data) {
+ const pm_list_node_t *current = list->head;
+ while (current != NULL) {
+ const pm_diagnostic_t *diagnostic = (const pm_diagnostic_t *) current;
+ callback(diagnostic, data);
+ current = current->next;
+ }
+}
+
+/**
+ * Iterates over the errors associated with the given parser and calls the
+ * given callback for each error.
+ */
+void
+pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) {
+ pm_parser_diagnostics_each(&parser->error_list, callback, data);
+}
+
+/**
+ * Iterates over the warnings associated with the given parser and calls the
+ * given callback for each warning.
+ */
+void
+pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) {
+ pm_parser_diagnostics_each(&parser->warning_list, callback, data);
+}
+
+/**
+ * Returns the number of constants in the constant pool associated with the
+ * given parser.
+ */
+size_t
+pm_parser_constants_size(const pm_parser_t *parser) {
+ return parser->constant_pool.size;
+}
+
+/**
+ * Iterates over the constants in the constant pool associated with the given
+ * parser and calls the given callback for each constant.
+ */
+void
+pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) {
+ for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
+ const pm_constant_t *constant = &parser->constant_pool.constants[index];
+ callback(constant, data);
+ }
+}
+
+/**
+ * Returns a pointer to the constant at the given id in the constant pool
+ * associated with the given parser.
+ */
+const pm_constant_t *
+pm_parser_constant(const pm_parser_t *parser, pm_constant_id_t constant_id) {
+ return pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+}
diff --git a/prism/parser.h b/prism/parser.h
index 992729d655..2c8c4b3a7a 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -6,928 +6,343 @@
#ifndef PRISM_PARSER_H
#define PRISM_PARSER_H
-#include "prism/defines.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
#include "prism/ast.h"
-#include "prism/encoding.h"
+#include "prism/comments.h"
+#include "prism/diagnostic.h"
+#include "prism/line_offset_list.h"
+#include "prism/magic_comments.h"
#include "prism/options.h"
-#include "prism/static_literals.h"
-#include "prism/util/pm_constant_pool.h"
-#include "prism/util/pm_list.h"
-#include "prism/util/pm_newline_list.h"
-#include "prism/util/pm_string.h"
-
-#include <stdbool.h>
-
-/**
- * This enum provides various bits that represent different kinds of states that
- * the lexer can track. This is used to determine which kind of token to return
- * based on the context of the parser.
- */
-typedef enum {
- PM_LEX_STATE_BIT_BEG,
- PM_LEX_STATE_BIT_END,
- PM_LEX_STATE_BIT_ENDARG,
- PM_LEX_STATE_BIT_ENDFN,
- PM_LEX_STATE_BIT_ARG,
- PM_LEX_STATE_BIT_CMDARG,
- PM_LEX_STATE_BIT_MID,
- PM_LEX_STATE_BIT_FNAME,
- PM_LEX_STATE_BIT_DOT,
- PM_LEX_STATE_BIT_CLASS,
- PM_LEX_STATE_BIT_LABEL,
- PM_LEX_STATE_BIT_LABELED,
- PM_LEX_STATE_BIT_FITEM
-} pm_lex_state_bit_t;
-
-/**
- * This enum combines the various bits from the above enum into individual
- * values that represent the various states of the lexer.
- */
-typedef enum {
- PM_LEX_STATE_NONE = 0,
- PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
- PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
- PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
- PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
- PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
- PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
- PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
- PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
- PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
- PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
- PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
- PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
- PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
- PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
- PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
- PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
-} pm_lex_state_t;
-
-/**
- * The type of quote that a heredoc uses.
- */
-typedef enum {
- PM_HEREDOC_QUOTE_NONE,
- PM_HEREDOC_QUOTE_SINGLE = '\'',
- PM_HEREDOC_QUOTE_DOUBLE = '"',
- PM_HEREDOC_QUOTE_BACKTICK = '`',
-} pm_heredoc_quote_t;
-
-/**
- * The type of indentation that a heredoc uses.
- */
-typedef enum {
- PM_HEREDOC_INDENT_NONE,
- PM_HEREDOC_INDENT_DASH,
- PM_HEREDOC_INDENT_TILDE,
-} pm_heredoc_indent_t;
-
-/**
- * All of the information necessary to store to lexing a heredoc.
- */
-typedef struct {
- /** A pointer to the start of the heredoc identifier. */
- const uint8_t *ident_start;
-
- /** The length of the heredoc identifier. */
- size_t ident_length;
-
- /** The type of quote that the heredoc uses. */
- pm_heredoc_quote_t quote;
-
- /** The type of indentation that the heredoc uses. */
- pm_heredoc_indent_t indent;
-} pm_heredoc_lex_mode_t;
-
-/**
- * When lexing Ruby source, the lexer has a small amount of state to tell which
- * kind of token it is currently lexing. For example, when we find the start of
- * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
- * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
- * are found as part of a string.
- */
-typedef struct pm_lex_mode {
- /** The type of this lex mode. */
- enum {
- /** This state is used when any given token is being lexed. */
- PM_LEX_DEFAULT,
-
- /**
- * This state is used when we're lexing as normal but inside an embedded
- * expression of a string.
- */
- PM_LEX_EMBEXPR,
-
- /**
- * This state is used when we're lexing a variable that is embedded
- * directly inside of a string with the # shorthand.
- */
- PM_LEX_EMBVAR,
-
- /** This state is used when you are inside the content of a heredoc. */
- PM_LEX_HEREDOC,
-
- /**
- * This state is used when we are lexing a list of tokens, as in a %w
- * word list literal or a %i symbol list literal.
- */
- PM_LEX_LIST,
-
- /**
- * This state is used when a regular expression has been begun and we
- * are looking for the terminator.
- */
- PM_LEX_REGEXP,
-
- /**
- * This state is used when we are lexing a string or a string-like
- * token, as in string content with either quote or an xstring.
- */
- PM_LEX_STRING
- } mode;
-
- /** The data associated with this type of lex mode. */
- union {
- struct {
- /** This keeps track of the nesting level of the list. */
- size_t nesting;
-
- /** Whether or not interpolation is allowed in this list. */
- bool interpolation;
-
- /**
- * When lexing a list, it takes into account balancing the
- * terminator if the terminator is one of (), [], {}, or <>.
- */
- uint8_t incrementor;
-
- /** This is the terminator of the list literal. */
- uint8_t terminator;
-
- /**
- * This is the character set that should be used to delimit the
- * tokens within the list.
- */
- uint8_t breakpoints[11];
- } list;
-
- struct {
- /**
- * This keeps track of the nesting level of the regular expression.
- */
- size_t nesting;
-
- /**
- * When lexing a regular expression, it takes into account balancing
- * the terminator if the terminator is one of (), [], {}, or <>.
- */
- uint8_t incrementor;
-
- /** This is the terminator of the regular expression. */
- uint8_t terminator;
-
- /**
- * This is the character set that should be used to delimit the
- * tokens within the regular expression.
- */
- uint8_t breakpoints[7];
- } regexp;
-
- struct {
- /** This keeps track of the nesting level of the string. */
- size_t nesting;
-
- /** Whether or not interpolation is allowed in this string. */
- bool interpolation;
-
- /**
- * Whether or not at the end of the string we should allow a :,
- * which would indicate this was a dynamic symbol instead of a
- * string.
- */
- bool label_allowed;
-
- /**
- * When lexing a string, it takes into account balancing the
- * terminator if the terminator is one of (), [], {}, or <>.
- */
- uint8_t incrementor;
-
- /**
- * This is the terminator of the string. It is typically either a
- * single or double quote.
- */
- uint8_t terminator;
-
- /**
- * This is the character set that should be used to delimit the
- * tokens within the string.
- */
- uint8_t breakpoints[7];
- } string;
-
- struct {
- /**
- * All of the data necessary to lex a heredoc.
- */
- pm_heredoc_lex_mode_t base;
-
- /**
- * This is the pointer to the character where lexing should resume
- * once the heredoc has been completely processed.
- */
- const uint8_t *next_start;
-
- /**
- * This is used to track the amount of common whitespace on each
- * line so that we know how much to dedent each line in the case of
- * a tilde heredoc.
- */
- size_t *common_whitespace;
-
- /** True if the previous token ended with a line continuation. */
- bool line_continuation;
- } heredoc;
- } as;
-
- /** The previous lex state so that it knows how to pop. */
- struct pm_lex_mode *prev;
-} pm_lex_mode_t;
-
-/**
- * We pre-allocate a certain number of lex states in order to avoid having to
- * call malloc too many times while parsing. You really shouldn't need more than
- * this because you only really nest deeply when doing string interpolation.
- */
-#define PM_LEX_STACK_SIZE 4
/**
* The parser used to parse Ruby source.
*/
-typedef struct pm_parser pm_parser_t;
+typedef struct pm_parser_t pm_parser_t;
/**
- * While parsing, we keep track of a stack of contexts. This is helpful for
- * error recovery so that we can pop back to a previous context when we hit a
- * token that is understood by a parent context but not by the current context.
+ * Allocate and initialize a parser with the given start and end pointers.
+ *
+ * @param arena The arena to use for all AST-lifetime allocations. It is caller-
+ * owned and must outlive the parser.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param options The optional options to use when parsing. These options must
+ * live for the whole lifetime of this parser.
+ * @returns The initialized parser. It is the responsibility of the caller to
+ * free the parser with `pm_parser_free()`.
*/
-typedef enum {
- /** a null context, used for returning a value from a function */
- PM_CONTEXT_NONE = 0,
-
- /** a begin statement */
- PM_CONTEXT_BEGIN,
-
- /** an ensure statement with an explicit begin */
- PM_CONTEXT_BEGIN_ENSURE,
-
- /** a rescue else statement with an explicit begin */
- PM_CONTEXT_BEGIN_ELSE,
-
- /** a rescue statement with an explicit begin */
- PM_CONTEXT_BEGIN_RESCUE,
-
- /** expressions in block arguments using braces */
- PM_CONTEXT_BLOCK_BRACES,
-
- /** expressions in block arguments using do..end */
- PM_CONTEXT_BLOCK_KEYWORDS,
-
- /** an ensure statement within a do..end block */
- PM_CONTEXT_BLOCK_ENSURE,
-
- /** a rescue else statement within a do..end block */
- PM_CONTEXT_BLOCK_ELSE,
-
- /** a rescue statement within a do..end block */
- PM_CONTEXT_BLOCK_RESCUE,
-
- /** a case when statements */
- PM_CONTEXT_CASE_WHEN,
-
- /** a case in statements */
- PM_CONTEXT_CASE_IN,
-
- /** a class declaration */
- PM_CONTEXT_CLASS,
-
- /** an ensure statement within a class statement */
- PM_CONTEXT_CLASS_ENSURE,
-
- /** a rescue else statement within a class statement */
- PM_CONTEXT_CLASS_ELSE,
-
- /** a rescue statement within a class statement */
- PM_CONTEXT_CLASS_RESCUE,
-
- /** a method definition */
- PM_CONTEXT_DEF,
-
- /** an ensure statement within a method definition */
- PM_CONTEXT_DEF_ENSURE,
-
- /** a rescue else statement within a method definition */
- PM_CONTEXT_DEF_ELSE,
-
- /** a rescue statement within a method definition */
- PM_CONTEXT_DEF_RESCUE,
-
- /** a method definition's parameters */
- PM_CONTEXT_DEF_PARAMS,
-
- /** a defined? expression */
- PM_CONTEXT_DEFINED,
-
- /** a method definition's default parameter */
- PM_CONTEXT_DEFAULT_PARAMS,
-
- /** an else clause */
- PM_CONTEXT_ELSE,
-
- /** an elsif clause */
- PM_CONTEXT_ELSIF,
-
- /** an interpolated expression */
- PM_CONTEXT_EMBEXPR,
-
- /** a for loop */
- PM_CONTEXT_FOR,
-
- /** a for loop's index */
- PM_CONTEXT_FOR_INDEX,
-
- /** an if statement */
- PM_CONTEXT_IF,
-
- /** a lambda expression with braces */
- PM_CONTEXT_LAMBDA_BRACES,
-
- /** a lambda expression with do..end */
- PM_CONTEXT_LAMBDA_DO_END,
-
- /** an ensure statement within a lambda expression */
- PM_CONTEXT_LAMBDA_ENSURE,
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NONNULL(1);
- /** a rescue else statement within a lambda expression */
- PM_CONTEXT_LAMBDA_ELSE,
-
- /** a rescue statement within a lambda expression */
- PM_CONTEXT_LAMBDA_RESCUE,
+/**
+ * Free both the memory held by the given parser and the parser itself.
+ *
+ * @param parser The parser to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1);
- /** the predicate clause of a loop statement */
- PM_CONTEXT_LOOP_PREDICATE,
+/**
+ * When the encoding that is being used to parse the source is changed by prism,
+ * we provide the ability here to call out to a user-defined function.
+ */
+typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
- /** the top level context */
- PM_CONTEXT_MAIN,
+/**
+ * This is the callback that is called when a token is lexed. It is passed
+ * the opaque data pointer, the parser, and the token that was lexed.
+ */
+typedef void (*pm_lex_callback_t)(pm_parser_t *parser, pm_token_t *token, void *data);
- /** a module declaration */
- PM_CONTEXT_MODULE,
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback) PRISM_NONNULL(1);
- /** an ensure statement within a module statement */
- PM_CONTEXT_MODULE_ENSURE,
+/**
+ * Register a callback that will be called whenever a token is lexed.
+ *
+ * @param parser The parser to register the callback with.
+ * @param data The opaque data to pass to the callback when it is called.
+ * @param callback The callback to register.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_lex_callback_set(pm_parser_t *parser, pm_lex_callback_t callback, void *data) PRISM_NONNULL(1);
- /** a rescue else statement within a module statement */
- PM_CONTEXT_MODULE_ELSE,
+/**
+ * Returns the opaque data that is passed to the lex callback when it is called.
+ *
+ * @param parser The parser whose lex callback data we want to get.
+ * @returns The opaque data that is passed to the lex callback when it is called.
+ */
+PRISM_EXPORTED_FUNCTION void * pm_parser_lex_callback_data(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a rescue statement within a module statement */
- PM_CONTEXT_MODULE_RESCUE,
+/**
+ * Returns the raw pointer to the start of the source that is being parsed.
+ *
+ * @param parser the parser whose start pointer we want to get
+ * @returns the raw pointer to the start of the source that is being parsed
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_start(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a multiple target expression */
- PM_CONTEXT_MULTI_TARGET,
+/**
+ * Returns the raw pointer to the end of the source that is being parsed.
+ *
+ * @param parser the parser whose end pointer we want to get
+ * @returns the raw pointer to the end of the source that is being parsed
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_parser_end(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a parenthesized expression */
- PM_CONTEXT_PARENS,
+/**
+ * Returns the line that the parser was considered to have started on.
+ *
+ * @param parser the parser whose start line we want to get
+ * @returns the line that the parser was considered to have started on
+ */
+PRISM_EXPORTED_FUNCTION int32_t pm_parser_start_line(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** an END block */
- PM_CONTEXT_POSTEXE,
+/**
+ * Returns the name of the encoding that is being used to parse the source.
+ *
+ * @param parser the parser whose encoding name we want to get
+ * @returns the name of the encoding that is being used to parse the source
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_parser_encoding_name(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a predicate inside an if/elsif/unless statement */
- PM_CONTEXT_PREDICATE,
+/**
+ * Returns the width of the character at the given pointer in the encoding that
+ * is being used to parse the source.
+ *
+ * @param parser the parser whose encoding we want to use
+ * @param start a pointer to the start of the character
+ * @param remaining the number of bytes remaining in the source
+ * @returns the width of the character in bytes
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_encoding_char_width(const pm_parser_t *parser, const uint8_t *start, ptrdiff_t remaining) PRISM_NONNULL(1, 2);
- /** a BEGIN block */
- PM_CONTEXT_PREEXE,
+/**
+ * Returns whether or not the parser is using the US-ASCII encoding.
+ *
+ * @param parser the parser to check
+ * @returns true if the parser is using US-ASCII encoding, false otherwise
+ */
+PRISM_EXPORTED_FUNCTION bool pm_parser_encoding_us_ascii(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a modifier rescue clause */
- PM_CONTEXT_RESCUE_MODIFIER,
+/**
+ * Returns the filepath that is being used to parse the source.
+ *
+ * @param parser the parser whose filepath we want to get
+ * @returns a pointer to the filepath string
+ */
+PRISM_EXPORTED_FUNCTION const pm_string_t * pm_parser_filepath(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a singleton class definition */
- PM_CONTEXT_SCLASS,
+/**
+ * Find a constant in the parser's constant pool. Returns the id of the
+ * constant, or 0 if the constant is not found.
+ *
+ * @param parser the parser whose constant pool we want to search
+ * @param start a pointer to the start of the string to search for
+ * @param length the length of the string to search for
+ * @returns the id of the constant, or 0 if the constant is not found
+ */
+PRISM_EXPORTED_FUNCTION pm_constant_id_t pm_parser_constant_find(const pm_parser_t *parser, const uint8_t *start, size_t length) PRISM_NONNULL(1, 2);
- /** an ensure statement with a singleton class */
- PM_CONTEXT_SCLASS_ENSURE,
+/**
+ * Returns the frozen string literal value of the parser, as determined by the
+ * frozen_string_literal magic comment or the option set on the parser.
+ *
+ * @param parser the parser whose frozen string literal value we want to get
+ * @returns -1 if disabled, 0 if unset, 1 if enabled
+ */
+PRISM_EXPORTED_FUNCTION int8_t pm_parser_frozen_string_literal(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a rescue else statement with a singleton class */
- PM_CONTEXT_SCLASS_ELSE,
+/**
+ * Returns the line offsets that are associated with the given parser.
+ *
+ * @param parser the parser whose line offsets we want to get
+ * @returns the line offsets that are associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION const pm_line_offset_list_t * pm_parser_line_offsets(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a rescue statement with a singleton class */
- PM_CONTEXT_SCLASS_RESCUE,
+/**
+ * Returns the location of the __DATA__ section that is associated with the
+ * given parser.
+ *
+ * @param parser the parser whose data location we want to get
+ * @returns the location of the __DATA__ section that is associated with the
+ * given parser. If it is unset, then the length will be set to 0.
+ */
+PRISM_EXPORTED_FUNCTION const pm_location_t * pm_parser_data_loc(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a ternary expression */
- PM_CONTEXT_TERNARY,
+/**
+ * Returns whether the given parser is continuable, meaning that it could become
+ * valid if more input were appended, as opposed to being definitively invalid.
+ *
+ * @param parser the parser whose continuable status we want to get
+ * @returns whether the given parser is continuable
+ */
+PRISM_EXPORTED_FUNCTION bool pm_parser_continuable(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** an unless statement */
- PM_CONTEXT_UNLESS,
+/**
+ * Returns the lex state of the parser. Note that this is an internal detail,
+ * and we are purposefully not returning an instance of the internal enum that
+ * we use to track this. This is only exposed because we need it for some very
+ * niche use cases. Most consumers should avoid this function.
+ *
+ * @param parser the parser whose lex state we want to get
+ * @returns the lex state of the parser
+ */
+PRISM_EXPORTED_FUNCTION int pm_parser_lex_state(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** an until statement */
- PM_CONTEXT_UNTIL,
+/**
+ * Returns the number of comments associated with the given parser.
+ *
+ * @param parser the parser whose comments we want to get the size of
+ * @returns the number of comments associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** a while statement */
- PM_CONTEXT_WHILE,
-} pm_context_t;
+/**
+ * A callback function that can be used to process comments found while parsing.
+ */
+typedef void (*pm_comment_callback_t)(const pm_comment_t *comment, void *data);
-/** This is a node in a linked list of contexts. */
-typedef struct pm_context_node {
- /** The context that this node represents. */
- pm_context_t context;
+/**
+ * Iterates over the comments associated with the given parser and calls the
+ * given callback for each comment.
+ *
+ * @param parser the parser whose comments we want to iterate over
+ * @param callback the callback function to call for each comment. This function
+ * will be passed a pointer to the comment and the data parameter passed to
+ * this function.
+ * @param data the data to pass to the callback function for each comment. This
+ * can be NULL if no data needs to be passed to the callback function.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_comments_each(const pm_parser_t *parser, pm_comment_callback_t callback, void *data) PRISM_NONNULL(1);
- /** A pointer to the previous context in the linked list. */
- struct pm_context_node *prev;
-} pm_context_node_t;
+/**
+ * Returns the number of magic comments associated with the given parser.
+ *
+ * @param parser the parser whose magic comments we want to get the size of
+ * @returns the number of magic comments associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_magic_comments_size(const pm_parser_t *parser) PRISM_NONNULL(1);
-/** This is the type of a comment that we've found while parsing. */
-typedef enum {
- PM_COMMENT_INLINE,
- PM_COMMENT_EMBDOC
-} pm_comment_type_t;
+/**
+ * A callback function that can be used to process magic comments found while parsing.
+ */
+typedef void (*pm_magic_comment_callback_t)(const pm_magic_comment_t *magic_comment, void *data);
/**
- * This is a node in the linked list of comments that we've found while parsing.
+ * Iterates over the magic comments associated with the given parser and calls the
+ * given callback for each magic comment.
*
- * @extends pm_list_node_t
+ * @param parser the parser whose magic comments we want to iterate over
+ * @param callback the callback function to call for each magic comment. This
+ * function will be passed a pointer to the magic comment and the data
+ * parameter passed to this function.
+ * @param data the data to pass to the callback function for each magic comment.
+ * This can be NULL if no data needs to be passed to the callback function.
*/
-typedef struct pm_comment {
- /** The embedded base node. */
- pm_list_node_t node;
+PRISM_EXPORTED_FUNCTION void pm_parser_magic_comments_each(const pm_parser_t *parser, pm_magic_comment_callback_t callback, void *data) PRISM_NONNULL(1);
- /** The location of the comment in the source. */
- pm_location_t location;
+/**
+ * Returns the number of errors associated with the given parser.
+ *
+ * @param parser the parser whose errors we want to get the size of
+ * @returns the number of errors associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_errors_size(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** The type of comment that we've found. */
- pm_comment_type_t type;
-} pm_comment_t;
+/**
+ * Returns the number of warnings associated with the given parser.
+ *
+ * @param parser the parser whose warnings we want to get the size of
+ * @returns the number of warnings associated with the given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_warnings_size(const pm_parser_t *parser) PRISM_NONNULL(1);
/**
- * This is a node in the linked list of magic comments that we've found while
+ * A callback function that can be used to process diagnostics found while
* parsing.
+ */
+typedef void (*pm_diagnostic_callback_t)(const pm_diagnostic_t *diagnostic, void *data);
+
+/**
+ * Iterates over the errors associated with the given parser and calls the
+ * given callback for each error.
*
- * @extends pm_list_node_t
+ * @param parser the parser whose errors we want to iterate over
+ * @param callback the callback function to call for each error. This function
+ * will be passed a pointer to the error and the data parameter passed to
+ * this function.
+ * @param data the data to pass to the callback function for each error. This
+ * can be NULL if no data needs to be passed to the callback function.
*/
-typedef struct {
- /** The embedded base node. */
- pm_list_node_t node;
+PRISM_EXPORTED_FUNCTION void pm_parser_errors_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) PRISM_NONNULL(1);
- /** A pointer to the start of the key in the source. */
- const uint8_t *key_start;
+/**
+ * Iterates over the warnings associated with the given parser and calls the
+ * given callback for each warning.
+ *
+ * @param parser the parser whose warnings we want to iterate over
+ * @param callback the callback function to call for each warning. This function
+ * will be passed a pointer to the warning and the data parameter passed to
+ * this function.
+ * @param data the data to pass to the callback function for each warning. This
+ * can be NULL if no data needs to be passed to the callback function.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_warnings_each(const pm_parser_t *parser, pm_diagnostic_callback_t callback, void *data) PRISM_NONNULL(1);
- /** A pointer to the start of the value in the source. */
- const uint8_t *value_start;
+/**
+ * Returns the number of constants in the constant pool associated with the
+ * given parser.
+ *
+ * @param parser the parser whose constant pool constants we want to get the
+ * size of
+ * @returns the number of constants in the constant pool associated with the
+ * given parser
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_parser_constants_size(const pm_parser_t *parser) PRISM_NONNULL(1);
- /** The length of the key in the source. */
- uint32_t key_length;
+/**
+ * A callback function that can be used to process constants found while
+ * parsing.
+ */
+typedef void (*pm_constant_callback_t)(const pm_constant_t *constant, void *data);
- /** The length of the value in the source. */
- uint32_t value_length;
-} pm_magic_comment_t;
+/**
+ * Iterates over the constants in the constant pool associated with the given
+ * parser and calls the given callback for each constant.
+ *
+ * @param parser the parser whose constants we want to iterate over
+ * @param callback the callback function to call for each constant. This function
+ * will be passed a pointer to the constant and the data parameter passed to
+ * this function.
+ * @param data the data to pass to the callback function for each constant. This
+ * can be NULL if no data needs to be passed to the callback function.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_constants_each(const pm_parser_t *parser, pm_constant_callback_t callback, void *data) PRISM_NONNULL(1);
/**
- * When the encoding that is being used to parse the source is changed by prism,
- * we provide the ability here to call out to a user-defined function.
+ * Returns a pointer to the constant at the given id in the constant pool
+ * associated with the given parser.
+ *
+ * @param parser the parser whose constant pool we want to look up from
+ * @param constant_id the id of the constant to look up (1-based)
+ * @returns a pointer to the constant at the given id
*/
-typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
+PRISM_EXPORTED_FUNCTION const pm_constant_t * pm_parser_constant(const pm_parser_t *parser, pm_constant_id_t constant_id) PRISM_NONNULL(1);
/**
- * When you are lexing through a file, the lexer needs all of the information
- * that the parser additionally provides (for example, the local table). So if
- * you want to properly lex Ruby, you need to actually lex it in the context of
- * the parser. In order to provide this functionality, we optionally allow a
- * struct to be attached to the parser that calls back out to a user-provided
- * callback when each token is lexed.
- */
-typedef struct {
- /**
- * This opaque pointer is used to provide whatever information the user
- * deemed necessary to the callback. In our case we use it to pass the array
- * that the tokens get appended into.
- */
- void *data;
-
- /**
- * This is the callback that is called when a token is lexed. It is passed
- * the opaque data pointer, the parser, and the token that was lexed.
- */
- void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
-} pm_lex_callback_t;
-
-/** The type of shareable constant value that can be set. */
-typedef uint8_t pm_shareable_constant_value_t;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_NONE = 0x0;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_LITERAL = PM_SHAREABLE_CONSTANT_NODE_FLAGS_LITERAL;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_EVERYTHING;
-static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = PM_SHAREABLE_CONSTANT_NODE_FLAGS_EXPERIMENTAL_COPY;
-
-/**
- * This tracks an individual local variable in a certain lexical context, as
- * well as the number of times is it read.
- */
-typedef struct {
- /** The name of the local variable. */
- pm_constant_id_t name;
-
- /** The location of the local variable in the source. */
- pm_location_t location;
-
- /** The index of the local variable in the local table. */
- uint32_t index;
-
- /** The number of times the local variable is read. */
- uint32_t reads;
-
- /** The hash of the local variable. */
- uint32_t hash;
-} pm_local_t;
-
-/**
- * This is a set of local variables in a certain lexical context (method, class,
- * module, etc.). We need to track how many times these variables are read in
- * order to warn if they only get written.
- */
-typedef struct pm_locals {
- /** The number of local variables in the set. */
- uint32_t size;
-
- /** The capacity of the local variables set. */
- uint32_t capacity;
-
- /** The nullable allocated memory for the local variables in the set. */
- pm_local_t *locals;
-} pm_locals_t;
-
-/** The flags about scope parameters that can be set. */
-typedef uint8_t pm_scope_parameters_t;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NONE = 0x0;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS = 0x1;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS = 0x2;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_BLOCK = 0x4;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_FORWARDING_ALL = 0x8;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED = 0x10;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_INNER = 0x20;
-static const pm_scope_parameters_t PM_SCOPE_PARAMETERS_NUMBERED_FOUND = 0x40;
-
-/**
- * This struct represents a node in a linked list of scopes. Some scopes can see
- * into their parent scopes, while others cannot.
- */
-typedef struct pm_scope {
- /** A pointer to the previous scope in the linked list. */
- struct pm_scope *previous;
-
- /** The IDs of the locals in the given scope. */
- pm_locals_t locals;
-
- /**
- * This is a list of the implicit parameters contained within the block.
- * These will be processed after the block is parsed to determine the kind
- * of parameters node that should be used and to check if any errors need to
- * be added.
- */
- pm_node_list_t implicit_parameters;
-
- /**
- * This is a bitfield that indicates the parameters that are being used in
- * this scope. It is a combination of the PM_SCOPE_PARAMETERS_* constants.
- * There are three different kinds of parameters that can be used in a
- * scope:
- *
- * - Ordinary parameters (e.g., def foo(bar); end)
- * - Numbered parameters (e.g., def foo; _1; end)
- * - The it parameter (e.g., def foo; it; end)
- *
- * If ordinary parameters are being used, then certain parameters can be
- * forwarded to another method/structure. Those are indicated by four
- * additional bits in the params field. For example, some combinations of:
- *
- * - def foo(*); end
- * - def foo(**); end
- * - def foo(&); end
- * - def foo(...); end
- */
- pm_scope_parameters_t parameters;
-
- /**
- * The current state of constant shareability for this scope. This is
- * changed by magic shareable_constant_value comments.
- */
- pm_shareable_constant_value_t shareable_constant;
-
- /**
- * A boolean indicating whether or not this scope can see into its parent.
- * If closed is true, then the scope cannot see into its parent.
- */
- bool closed;
-} pm_scope_t;
-
-/**
- * A struct that represents a stack of boolean values.
- */
-typedef uint32_t pm_state_stack_t;
-
-/**
- * This struct represents the overall parser. It contains a reference to the
- * source file, as well as pointers that indicate where in the source it's
- * currently parsing. It also contains the most recent and current token that
- * it's considering.
- */
-struct pm_parser {
- /**
- * The next node identifier that will be assigned. This is a unique
- * identifier used to track nodes such that the syntax tree can be dropped
- * but the node can be found through another parse.
- */
- uint32_t node_id;
-
- /** The current state of the lexer. */
- pm_lex_state_t lex_state;
-
- /** Tracks the current nesting of (), [], and {}. */
- int enclosure_nesting;
-
- /**
- * Used to temporarily track the nesting of enclosures to determine if a {
- * is the beginning of a lambda following the parameters of a lambda.
- */
- int lambda_enclosure_nesting;
-
- /**
- * Used to track the nesting of braces to ensure we get the correct value
- * when we are interpolating blocks with braces.
- */
- int brace_nesting;
-
- /**
- * The stack used to determine if a do keyword belongs to the predicate of a
- * while, until, or for loop.
- */
- pm_state_stack_t do_loop_stack;
-
- /**
- * The stack used to determine if a do keyword belongs to the beginning of a
- * block.
- */
- pm_state_stack_t accepts_block_stack;
-
- /** A stack of lex modes. */
- struct {
- /** The current mode of the lexer. */
- pm_lex_mode_t *current;
-
- /** The stack of lexer modes. */
- pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
-
- /** The current index into the lexer mode stack. */
- size_t index;
- } lex_modes;
-
- /** The pointer to the start of the source. */
- const uint8_t *start;
-
- /** The pointer to the end of the source. */
- const uint8_t *end;
-
- /** The previous token we were considering. */
- pm_token_t previous;
-
- /** The current token we're considering. */
- pm_token_t current;
-
- /**
- * This is a special field set on the parser when we need the parser to jump
- * to a specific location when lexing the next token, as opposed to just
- * using the end of the previous token. Normally this is NULL.
- */
- const uint8_t *next_start;
-
- /**
- * This field indicates the end of a heredoc whose identifier was found on
- * the current line. If another heredoc is found on the same line, then this
- * will be moved forward to the end of that heredoc. If no heredocs are
- * found on a line then this is NULL.
- */
- const uint8_t *heredoc_end;
-
- /** The list of comments that have been found while parsing. */
- pm_list_t comment_list;
-
- /** The list of magic comments that have been found while parsing. */
- pm_list_t magic_comment_list;
-
- /**
- * An optional location that represents the location of the __END__ marker
- * and the rest of the content of the file. This content is loaded into the
- * DATA constant when the file being parsed is the main file being executed.
- */
- pm_location_t data_loc;
-
- /** The list of warnings that have been found while parsing. */
- pm_list_t warning_list;
-
- /** The list of errors that have been found while parsing. */
- pm_list_t error_list;
-
- /** The current local scope. */
- pm_scope_t *current_scope;
-
- /** The current parsing context. */
- pm_context_node_t *current_context;
-
- /**
- * The hash keys for the hash that is currently being parsed. This is not
- * usually necessary because it can pass it down the various call chains,
- * but in the event that you're parsing a hash that is being directly
- * pushed into another hash with **, we need to share the hash keys so that
- * we can warn for the nested hash as well.
- */
- pm_static_literals_t *current_hash_keys;
-
- /**
- * The encoding functions for the current file is attached to the parser as
- * it's parsing so that it can change with a magic comment.
- */
- const pm_encoding_t *encoding;
-
- /**
- * When the encoding that is being used to parse the source is changed by
- * prism, we provide the ability here to call out to a user-defined
- * function.
- */
- pm_encoding_changed_callback_t encoding_changed_callback;
-
- /**
- * This pointer indicates where a comment must start if it is to be
- * considered an encoding comment.
- */
- const uint8_t *encoding_comment_start;
-
- /**
- * This is an optional callback that can be attached to the parser that will
- * be called whenever a new token is lexed by the parser.
- */
- pm_lex_callback_t *lex_callback;
-
- /**
- * This is the path of the file being parsed. We use the filepath when
- * constructing SourceFileNodes.
- */
- pm_string_t filepath;
-
- /**
- * This constant pool keeps all of the constants defined throughout the file
- * so that we can reference them later.
- */
- pm_constant_pool_t constant_pool;
-
- /** This is the list of newline offsets in the source file. */
- pm_newline_list_t newline_list;
-
- /**
- * We want to add a flag to integer nodes that indicates their base. We only
- * want to parse these once, but we don't have space on the token itself to
- * communicate this information. So we store it here and pass it through
- * when we find tokens that we need it for.
- */
- pm_node_flags_t integer_base;
-
- /**
- * This string is used to pass information from the lexer to the parser. It
- * is particularly necessary because of escape sequences.
- */
- pm_string_t current_string;
-
- /**
- * The line number at the start of the parse. This will be used to offset
- * the line numbers of all of the locations.
- */
- int32_t start_line;
-
- /**
- * When a string-like expression is being lexed, any byte or escape sequence
- * that resolves to a value whose top bit is set (i.e., >= 0x80) will
- * explicitly set the encoding to the same encoding as the source.
- * Alternatively, if a unicode escape sequence is used (e.g., \\u{80}) that
- * resolves to a value whose top bit is set, then the encoding will be
- * explicitly set to UTF-8.
- *
- * The _next_ time this happens, if the encoding that is about to become the
- * explicitly set encoding does not match the previously set explicit
- * encoding, a mixed encoding error will be emitted.
- *
- * When the expression is finished being lexed, the explicit encoding
- * controls the encoding of the expression. For the most part this means
- * that the expression will either be encoded in the source encoding or
- * UTF-8. This holds for all encodings except US-ASCII. If the source is
- * US-ASCII and an explicit encoding was set that was _not_ UTF-8, then the
- * expression will be encoded as ASCII-8BIT.
- *
- * Note that if the expression is a list, different elements within the same
- * list can have different encodings, so this will get reset between each
- * element. Furthermore all of this only applies to lists that support
- * interpolation, because otherwise escapes that could change the encoding
- * are ignored.
- *
- * At first glance, it may make more sense for this to live on the lexer
- * mode, but we need it here to communicate back to the parser for character
- * literals that do not push a new lexer mode.
- */
- const pm_encoding_t *explicit_encoding;
-
- /**
- * When parsing block exits (e.g., break, next, redo), we need to validate
- * that they are in correct contexts. For the most part we can do this by
- * looking at our parent contexts. However, modifier while and until
- * expressions can change that context to make block exits valid. In these
- * cases, we need to keep track of the block exits and then validate them
- * after the expression has been parsed.
- *
- * We use a pointer here because we don't want to keep a whole list attached
- * since this will only be used in the context of begin/end expressions.
- */
- pm_node_list_t *current_block_exits;
-
- /** The version of prism that we should use to parse. */
- pm_options_version_t version;
-
- /** The command line flags given from the options. */
- uint8_t command_line;
-
- /**
- * Whether or not we have found a frozen_string_literal magic comment with
- * a true or false value.
- * May be:
- * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
- * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
- * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
- */
- int8_t frozen_string_literal;
-
- /**
- * Whether or not we are parsing an eval string. This impacts whether or not
- * we should evaluate if block exits/yields are valid.
- */
- bool parsing_eval;
-
- /**
- * Whether or not we are parsing a "partial" script, which is a script that
- * will be evaluated in the context of another script, so we should not
- * check jumps (next/break/etc.) for validity.
- */
- bool partial_script;
-
- /** Whether or not we're at the beginning of a command. */
- bool command_start;
-
- /** Whether or not we're currently recovering from a syntax error. */
- bool recovering;
-
- /**
- * This is very specialized behavior for when you want to parse in a context
- * that does not respect encoding comments. Its main use case is translating
- * into the whitequark/parser AST which re-encodes source files in UTF-8
- * before they are parsed and ignores encoding comments.
- */
- bool encoding_locked;
-
- /**
- * Whether or not the encoding has been changed by a magic comment. We use
- * this to provide a fast path for the lexer instead of going through the
- * function pointer.
- */
- bool encoding_changed;
-
- /**
- * This flag indicates that we are currently parsing a pattern matching
- * expression and impacts that calculation of newlines.
- */
- bool pattern_matching_newlines;
-
- /** This flag indicates that we are currently parsing a keyword argument. */
- bool in_keyword_arg;
-
- /**
- * Whether or not the parser has seen a token that has semantic meaning
- * (i.e., a token that is not a comment or whitespace).
- */
- bool semantic_token_seen;
-
- /**
- * True if the current regular expression being lexed contains only ASCII
- * characters.
- */
- bool current_regular_expression_ascii_only;
-
- /**
- * By default, Ruby always warns about mismatched indentation. This can be
- * toggled with a magic comment.
- */
- bool warn_mismatched_indentation;
-};
+ * Initiate the parser with the given parser.
+ *
+ * @param parser The parser to use.
+ * @returns The AST representing the source.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1);
#endif
diff --git a/prism/prettyprint.h b/prism/prettyprint.h
index 5a52b2b6b8..0d8e416341 100644
--- a/prism/prettyprint.h
+++ b/prism/prettyprint.h
@@ -6,19 +6,16 @@
#ifndef PRISM_PRETTYPRINT_H
#define PRISM_PRETTYPRINT_H
-#include "prism/defines.h"
+#include "prism/excludes.h"
-#ifdef PRISM_EXCLUDE_PRETTYPRINT
+#ifndef PRISM_EXCLUDE_PRETTYPRINT
-void pm_prettyprint(void);
-
-#else
-
-#include <stdio.h>
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
#include "prism/ast.h"
+#include "prism/buffer.h"
#include "prism/parser.h"
-#include "prism/util/pm_buffer.h"
/**
* Pretty-prints the AST represented by the given node to the given buffer.
@@ -27,7 +24,7 @@ void pm_prettyprint(void);
* @param parser The parser that parsed the AST.
* @param node The root node of the AST to pretty-print.
*/
-PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node);
+PRISM_EXPORTED_FUNCTION void pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) PRISM_NONNULL(1, 2, 3);
#endif
diff --git a/prism/prism.c b/prism/prism.c
index cc634b59e3..a8bbcea097 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,4 +1,90 @@
-#include "prism.h"
+#include "prism/compiler/accel.h"
+#include "prism/compiler/fallthrough.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+#include "prism/internal/bit.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/char.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/isinf.h"
+#include "prism/internal/line_offset_list.h"
+#include "prism/internal/list.h"
+#include "prism/internal/magic_comments.h"
+#include "prism/internal/memchr.h"
+#include "prism/internal/node.h"
+#include "prism/internal/options.h"
+#include "prism/internal/parser.h"
+#include "prism/internal/regexp.h"
+#include "prism/internal/serialize.h"
+#include "prism/internal/source.h"
+#include "prism/internal/static_literals.h"
+#include "prism/internal/stringy.h"
+#include "prism/internal/strncasecmp.h"
+#include "prism/internal/strpbrk.h"
+#include "prism/internal/tokens.h"
+
+#include "prism/excludes.h"
+#include "prism/serialize.h"
+#include "prism/stream.h"
+#include "prism/version.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * When we are parsing using recursive descent, we want to protect against
+ * malicious payloads that could attempt to crash our parser. We do this by
+ * specifying a maximum depth to which we are allowed to recurse.
+ */
+#ifndef PRISM_DEPTH_MAXIMUM
+ #define PRISM_DEPTH_MAXIMUM 10000
+#endif
+
+/**
+ * A simple utility macro to concatenate two tokens together, necessary when one
+ * of the tokens is itself a macro.
+ */
+#define PM_CONCATENATE(left, right) left ## right
+
+/**
+ * We want to be able to use static assertions, but they weren't standardized
+ * until C11. As such, we polyfill it here by making a hacky typedef that will
+ * fail to compile due to a negative array size if the condition is false.
+ */
+#if defined(_Static_assert)
+# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
+#else
+# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
+#endif
+
+/**
+ * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its
+ * branch predication.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+ /** The compiler should predicate that this branch will be taken. */
+ #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
+
+ /** The compiler should predicate that this branch will not be taken. */
+ #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+ /** Void because this platform does not support branch prediction hints. */
+ #define PRISM_LIKELY(x) (x)
+
+ /** Void because this platform does not support branch prediction hints. */
+ #define PRISM_UNLIKELY(x) (x)
+#endif
/**
* The prism version and the serialization format.
@@ -19,6 +105,51 @@ pm_version(void) {
#define MAX(a,b) (((a)>(b))?(a):(b))
/******************************************************************************/
+/* Helpful AST-related macros */
+/******************************************************************************/
+
+#define U32(value_) ((uint32_t) (value_))
+
+#define FL PM_NODE_FLAGS
+#define UP PM_NODE_UPCAST
+
+#define PM_LOCATION_START(location_) ((location_)->start)
+#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
+
+#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
+#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
+#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
+#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
+
+#define PM_NODE_START(node_) (UP(node_)->location.start)
+#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
+#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
+#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
+
+#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
+#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
+
+#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
+#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
+#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
+#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
+#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
+
+#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
+#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
+#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
+#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
+
+#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
+#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
+#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
+#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
+
+#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
+#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
+#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
+
+/******************************************************************************/
/* Lex mode manipulations */
/******************************************************************************/
@@ -26,7 +157,7 @@ pm_version(void) {
* Returns the incrementor character that should be used to increment the
* nesting count if one is possible.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
lex_mode_incrementor(const uint8_t start) {
switch (start) {
case '(':
@@ -43,7 +174,7 @@ lex_mode_incrementor(const uint8_t start) {
* Returns the matching character that should be used to terminate a list
* beginning with the given character.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
lex_mode_terminator(const uint8_t start) {
switch (start) {
case '(':
@@ -85,7 +216,7 @@ lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
/**
* Push on a new list lex mode.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
uint8_t incrementor = lex_mode_incrementor(delimiter);
uint8_t terminator = lex_mode_terminator(delimiter);
@@ -103,7 +234,8 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
// These are the places where we need to split up the content of the list.
// We'll use strpbrk to find the first of these characters.
uint8_t *breakpoints = lex_mode.as.list.breakpoints;
- memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
+ memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+ memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
size_t index = 7;
// Now we'll add the terminator to the list of breakpoints. If the
@@ -132,7 +264,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
* called when we're at the end of the file. We want the parser to be able to
* perform its normal error tolerance.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_list_eof(pm_parser_t *parser) {
return lex_mode_push_list(parser, false, '\0');
}
@@ -140,7 +272,7 @@ lex_mode_push_list_eof(pm_parser_t *parser) {
/**
* Push on a new regexp lex mode.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
pm_lex_mode_t lex_mode = {
.mode = PM_LEX_REGEXP,
@@ -155,7 +287,8 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
// regular expression. We'll use strpbrk to find the first of these
// characters.
uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
- memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+ memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+ memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
size_t index = 4;
// First we'll add the terminator.
@@ -175,7 +308,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
/**
* Push on a new string lex mode.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
pm_lex_mode_t lex_mode = {
.mode = PM_LEX_STRING,
@@ -191,7 +324,8 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
uint8_t *breakpoints = lex_mode.as.string.breakpoints;
- memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
+ memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
+ memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
size_t index = 3;
// Now add in the terminator. If the terminator is not already a NULL byte,
@@ -221,7 +355,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed
* called when we're at the end of the file. We want the parser to be able to
* perform its normal error tolerance.
*/
-static inline bool
+static PRISM_INLINE bool
lex_mode_push_string_eof(pm_parser_t *parser) {
return lex_mode_push_string(parser, false, false, '\0', '\0');
}
@@ -241,7 +375,7 @@ lex_mode_pop(pm_parser_t *parser) {
} else {
parser->lex_modes.index--;
pm_lex_mode_t *prev = parser->lex_modes.current->prev;
- xfree(parser->lex_modes.current);
+ xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
parser->lex_modes.current = prev;
}
}
@@ -249,7 +383,7 @@ lex_mode_pop(pm_parser_t *parser) {
/**
* This is the equivalent of IS_lex_state is CRuby.
*/
-static inline bool
+static PRISM_INLINE bool
lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
return parser->lex_state & state;
}
@@ -260,7 +394,7 @@ typedef enum {
PM_IGNORED_NEWLINE_PATTERN
} pm_ignored_newline_type_t;
-static inline pm_ignored_newline_type_t
+static PRISM_INLINE pm_ignored_newline_type_t
lex_state_ignored_p(pm_parser_t *parser) {
bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
@@ -273,17 +407,17 @@ lex_state_ignored_p(pm_parser_t *parser) {
}
}
-static inline bool
+static PRISM_INLINE bool
lex_state_beg_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
}
-static inline bool
+static PRISM_INLINE bool
lex_state_arg_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
}
-static inline bool
+static PRISM_INLINE bool
lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
if (parser->current.end >= parser->end) {
return false;
@@ -291,7 +425,7 @@ lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
}
-static inline bool
+static PRISM_INLINE bool
lex_state_end_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_END_ANY);
}
@@ -299,7 +433,7 @@ lex_state_end_p(pm_parser_t *parser) {
/**
* This is the equivalent of IS_AFTER_OPERATOR in CRuby.
*/
-static inline bool
+static PRISM_INLINE bool
lex_state_operator_p(pm_parser_t *parser) {
return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
}
@@ -308,7 +442,7 @@ lex_state_operator_p(pm_parser_t *parser) {
* Set the state of the lexer. This is defined as a function to be able to put a
* breakpoint in it.
*/
-static inline void
+static PRISM_INLINE void
lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
parser->lex_state = state;
}
@@ -322,7 +456,7 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
#endif
#if PM_DEBUG_LOGGING
-PRISM_ATTRIBUTE_UNUSED static void
+PRISM_UNUSED static void
debug_state(pm_parser_t *parser) {
fprintf(stderr, "STATE: ");
bool first = true;
@@ -403,140 +537,134 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
/**
* Append an error to the list of errors on the parser.
*/
-static inline void
-pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
+static PRISM_INLINE void
+pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
}
/**
- * Append an error to the list of errors on the parser using a format string.
+ * Append an error to the list of errors on the parser using the location of the
+ * given token.
*/
-#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
- pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
+static PRISM_INLINE void
+pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+ pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
+}
/**
* Append an error to the list of errors on the parser using the location of the
* current token.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
+ pm_parser_err_token(parser, &parser->current, diag_id);
}
/**
- * Append an error to the list of errors on the parser using the given location
- * using a format string.
+ * Append an error to the list of errors on the parser using the location of the
+ * previous token.
*/
-#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
- PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
+static PRISM_INLINE void
+pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+ pm_parser_err_token(parser, &parser->previous, diag_id);
+}
/**
* Append an error to the list of errors on the parser using the location of the
* given node.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, node->location.start, node->location.end, diag_id);
+ pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
}
/**
- * Append an error to the list of errors on the parser using the location of the
- * given node and a format string.
- */
-#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
- PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
-
-/**
- * Append an error to the list of errors on the parser using the location of the
- * given node and a format string, and add on the content of the node.
+ * Append an error to the list of errors on the parser using a format string.
*/
-#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
- PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
+#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
+ pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
- * previous token.
+ * given node and a format string.
*/
-static inline void
-pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
-}
+#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
+ PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
- * given token.
+ * given node and a format string, and add on the content of the node.
*/
-static inline void
-pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
- pm_parser_err(parser, token->start, token->end, diag_id);
-}
+#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
+ PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
/**
* Append an error to the list of errors on the parser using the location of the
* given token and a format string.
*/
-#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
- PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
+ PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
/**
* Append an error to the list of errors on the parser using the location of the
* given token and a format string, and add on the content of the token.
*/
-#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
- PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
+ PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
/**
* Append a warning to the list of warnings on the parser.
*/
-static inline void
-pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
+static PRISM_INLINE void
+pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
}
/**
* Append a warning to the list of warnings on the parser using the location of
* the given token.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
- pm_parser_warn(parser, token->start, token->end, diag_id);
+ pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
}
/**
* Append a warning to the list of warnings on the parser using the location of
* the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
- pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
+ pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
}
/**
- * Append a warning to the list of warnings on the parser using a format string.
+ * Append a warning to the list of warnings on the parser using a format string
+ * and the given location.
*/
-#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
- pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
+ pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
/**
* Append a warning to the list of warnings on the parser using the location of
* the given token and a format string.
*/
-#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
- PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
+ PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
/**
* Append a warning to the list of warnings on the parser using the location of
* the given token and a format string, and add on the content of the token.
*/
-#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
- PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
+#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
+ PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
/**
* Append a warning to the list of warnings on the parser using the location of
* the given node and a format string.
*/
-#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
- PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
+#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
+ PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
/**
* Add an error for an expected heredoc terminator. This is a special function
@@ -547,8 +675,8 @@ static void
pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
PM_PARSER_ERR_FORMAT(
parser,
- ident_start,
- ident_start + ident_length,
+ U32(ident_start - parser->start),
+ U32(ident_length),
PM_ERR_HEREDOC_TERM,
(int) ident_length,
(const char *) ident_start
@@ -708,7 +836,7 @@ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t
/**
* Get the current state of constant shareability.
*/
-static inline pm_shareable_constant_value_t
+static PRISM_INLINE pm_shareable_constant_value_t
pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
return parser->current_scope->shareable_constant;
}
@@ -733,12 +861,12 @@ pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constan
/**
* The point at which the set of locals switches from being a list to a hash.
*/
-#define PM_LOCALS_HASH_THRESHOLD 9
+#define PM_LOCALS_HASH_THRESHOLD 5
static void
pm_locals_free(pm_locals_t *locals) {
if (locals->capacity > 0) {
- xfree(locals->locals);
+ xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
}
}
@@ -810,11 +938,13 @@ pm_locals_resize(pm_locals_t *locals) {
* @return True if the local was added, and false if the local already exists.
*/
static bool
-pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
if (locals->size >= (locals->capacity / 4 * 3)) {
pm_locals_resize(locals);
}
+ locals->bloom |= (1u << (name & 31));
+
if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
for (uint32_t index = 0; index < locals->capacity; index++) {
pm_local_t *local = &locals->locals[index];
@@ -822,7 +952,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
if (local->name == PM_CONSTANT_ID_UNSET) {
*local = (pm_local_t) {
.name = name,
- .location = { .start = start, .end = end },
+ .location = { .start = start, .length = length },
.index = locals->size++,
.reads = reads,
.hash = 0
@@ -843,7 +973,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
if (local->name == PM_CONSTANT_ID_UNSET) {
*local = (pm_local_t) {
.name = name,
- .location = { .start = start, .end = end },
+ .location = { .start = start, .length = length },
.index = locals->size++,
.reads = reads,
.hash = initial_hash
@@ -867,6 +997,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start
*/
static uint32_t
pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+ if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
+
if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
for (uint32_t index = 0; index < locals->size; index++) {
pm_local_t *local = &locals->locals[index];
@@ -943,8 +1075,8 @@ pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
* written but not read in certain contexts.
*/
static void
-pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
- pm_constant_id_list_init_capacity(list, locals->size);
+pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
+ pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
// If we're still below the threshold for switching to a hash, then we only
// need to loop over the locals until we hit the size because the locals are
@@ -961,14 +1093,14 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
if (local->name != PM_CONSTANT_ID_UNSET) {
pm_constant_id_list_insert(list, (size_t) local->index, local->name);
- if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
+ if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
if (constant->length >= 1 && *constant->start != '_') {
PM_PARSER_WARN_FORMAT(
parser,
local->location.start,
- local->location.end,
+ local->location.length,
PM_WARN_UNUSED_LOCAL_VARIABLE,
(int) constant->length,
(const char *) constant->start
@@ -986,43 +1118,53 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals,
/**
* Retrieve the constant pool id for the given location.
*/
-static inline pm_constant_id_t
-pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
+static PRISM_INLINE pm_constant_id_t
+pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+ /* Fast path: if this is the same token as the last lookup (same pointer
+ * range), return the cached result. */
+ if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
+ return parser->constant_cache.id;
+ }
+
+ pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
+
+ parser->constant_cache.start = start;
+ parser->constant_cache.end = end;
+ parser->constant_cache.id = id;
+
+ return id;
}
/**
* Retrieve the constant pool id for the given string.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
- return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
+ return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
}
/**
* Retrieve the constant pool id for the given static literal C string.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
- return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
+ return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
}
/**
* Retrieve the constant pool id for the given token.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
- return pm_parser_constant_id_location(parser, token->start, token->end);
+ return pm_parser_constant_id_raw(parser, token->start, token->end);
}
/**
- * Retrieve the constant pool id for the given token. If the token is not
- * provided, then return 0.
+ * This macro allows you to define a case statement for all of the nodes that
+ * may result in a void value.
*/
-static inline pm_constant_id_t
-pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
- return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
-}
+#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
+ case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
/**
* Check whether or not the given node is value expression.
@@ -1035,12 +1177,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
while (node != NULL) {
switch (PM_NODE_TYPE(node)) {
- case PM_RETURN_NODE:
- case PM_BREAK_NODE:
- case PM_NEXT_NODE:
- case PM_REDO_NODE:
- case PM_RETRY_NODE:
- case PM_MATCH_REQUIRED_NODE:
+ case PM_CASE_VOID_VALUE:
return void_node != NULL ? void_node : node;
case PM_MATCH_PREDICATE_NODE:
return NULL;
@@ -1049,57 +1186,128 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
if (cast->ensure_clause != NULL) {
if (cast->rescue_clause != NULL) {
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
if (vn != NULL) return vn;
}
if (cast->statements != NULL) {
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn != NULL) return vn;
}
- node = (pm_node_t *) cast->ensure_clause;
+ node = UP(cast->ensure_clause);
} else if (cast->rescue_clause != NULL) {
- if (cast->statements == NULL) return NULL;
+ // https://bugs.ruby-lang.org/issues/21669
+ if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ if (cast->statements == NULL) return NULL;
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
- if (vn == NULL) return NULL;
- if (void_node == NULL) void_node = vn;
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+ }
for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
+
if (vn == NULL) {
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return NULL;
+ }
void_node = NULL;
break;
}
- if (void_node == NULL) {
- void_node = vn;
- }
}
if (cast->else_clause != NULL) {
- node = (pm_node_t *) cast->else_clause;
+ node = UP(cast->else_clause);
+
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+ pm_node_t *vn = pm_check_value_expression(parser, node);
+ if (vn != NULL) return vn;
+ }
} else {
return void_node;
}
} else {
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
}
break;
}
+ case PM_CASE_NODE: {
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return NULL;
+ }
+
+ pm_case_node_t *cast = (pm_case_node_t *) node;
+ if (cast->else_clause == NULL) return NULL;
+
+ pm_node_t *condition;
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
+ assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
+
+ pm_when_node_t *cast = (pm_when_node_t *) condition;
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+ }
+
+ node = UP(cast->else_clause);
+ break;
+ }
+ case PM_CASE_MATCH_NODE: {
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return NULL;
+ }
+
+ pm_case_match_node_t *cast = (pm_case_match_node_t *) node;
+ if (cast->else_clause == NULL) return NULL;
+
+ pm_node_t *condition;
+ PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
+ assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
+
+ pm_in_node_t *cast = (pm_in_node_t *) condition;
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
+ if (vn == NULL) return NULL;
+ if (void_node == NULL) void_node = vn;
+ }
+
+ node = UP(cast->else_clause);
+ break;
+ }
case PM_ENSURE_NODE: {
pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
break;
}
case PM_PARENTHESES_NODE: {
pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node;
- node = (pm_node_t *) cast->body;
+ node = UP(cast->body);
break;
}
case PM_STATEMENTS_NODE: {
pm_statements_node_t *cast = (pm_statements_node_t *) node;
+
+ // https://bugs.ruby-lang.org/issues/21669
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
+ pm_node_t *body_part;
+ PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
+ switch (PM_NODE_TYPE(body_part)) {
+ case PM_CASE_VOID_VALUE:
+ if (void_node == NULL) {
+ void_node = body_part;
+ }
+ return void_node;
+ default: break;
+ }
+ }
+ }
+
node = cast->body.nodes[cast->body.size - 1];
break;
}
@@ -1108,7 +1316,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
if (cast->statements == NULL || cast->subsequent == NULL) {
return NULL;
}
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn == NULL) {
return NULL;
}
@@ -1123,19 +1331,19 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
if (cast->statements == NULL || cast->else_clause == NULL) {
return NULL;
}
- pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
+ pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
if (vn == NULL) {
return NULL;
}
if (void_node == NULL) {
void_node = vn;
}
- node = (pm_node_t *) cast->else_clause;
+ node = UP(cast->else_clause);
break;
}
case PM_ELSE_NODE: {
pm_else_node_t *cast = (pm_else_node_t *) node;
- node = (pm_node_t *) cast->statements;
+ node = UP(cast->statements);
break;
}
case PM_AND_NODE: {
@@ -1165,7 +1373,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
return NULL;
}
-static inline void
+static PRISM_INLINE void
pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
pm_node_t *void_node = pm_check_value_expression(parser, node);
if (void_node != NULL) {
@@ -1193,7 +1401,7 @@ pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
break;
case PM_CALL_NODE: {
const pm_call_node_t *cast = (const pm_call_node_t *) node;
- if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
+ if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
switch (message->length) {
@@ -1406,10 +1614,10 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
* Add a warning to the parser if the value that is being written inside of a
* predicate to a conditional is a literal.
*/
-static inline void
+static PRISM_INLINE void
pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
if (pm_conditional_predicate_warn_write_literal_p(node)) {
- pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
+ pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
}
}
@@ -1547,26 +1755,6 @@ pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_pr
}
/**
- * In a lot of places in the tree you can have tokens that are not provided but
- * that do not cause an error. For example, this happens in a method call
- * without parentheses. In these cases we set the token to the "not provided" type.
- * For example:
- *
- * pm_token_t token = not_provided(parser);
- */
-static inline pm_token_t
-not_provided(pm_parser_t *parser) {
- return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
-}
-
-#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
-#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
-#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
-#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
-#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
-#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
-
-/**
* This is a special out parameter to the parse_arguments_list function that
* includes opening and closing parentheses in addition to the arguments since
* it's so common. It is handy to use when passing argument information to one
@@ -1592,22 +1780,29 @@ typedef struct {
/**
* Retrieve the end location of a `pm_arguments_t` object.
*/
-static inline const uint8_t *
+static PRISM_INLINE const pm_location_t *
pm_arguments_end(pm_arguments_t *arguments) {
if (arguments->block != NULL) {
- const uint8_t *end = arguments->block->location.end;
- if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
- end = arguments->closing_loc.end;
+ uint32_t end = PM_NODE_END(arguments->block);
+
+ if (arguments->closing_loc.length > 0) {
+ uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
+ if (arguments_end > end) {
+ return &arguments->closing_loc;
+ }
}
- return end;
+ return &arguments->block->location;
}
- if (arguments->closing_loc.start != NULL) {
- return arguments->closing_loc.end;
+ if (arguments->closing_loc.length > 0) {
+ return &arguments->closing_loc;
}
if (arguments->arguments != NULL) {
- return arguments->arguments->base.location.end;
+ return &arguments->arguments->base.location;
+ }
+ if (arguments->opening_loc.length > 0) {
+ return &arguments->opening_loc;
}
- return arguments->closing_loc.end;
+ return NULL;
}
/**
@@ -1618,7 +1813,7 @@ static void
pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
// First, check that we have arguments and that we don't have a closing
// location for them.
- if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
+ if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
return;
}
@@ -1635,7 +1830,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
// If we didn't hit a case before this check, then at this point we need to
// add a syntax error.
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
}
/******************************************************************************/
@@ -1648,7 +1843,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
* reason we have the encoding_changed boolean to check if we need to go through
* the function pointer or can just directly use the UTF-8 functions.
*/
-static inline size_t
+static PRISM_INLINE size_t
char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
if (n <= 0) return 0;
@@ -1675,7 +1870,7 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t
* Similar to char_is_identifier but this function assumes that the encoding
* has not been changed.
*/
-static inline size_t
+static PRISM_INLINE size_t
char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
if (n <= 0) {
return 0;
@@ -1687,11 +1882,189 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
}
/**
+ * Scan forward through ASCII identifier characters (a-z, A-Z, 0-9, _) using
+ * wide operations. Returns the number of leading ASCII identifier bytes.
+ * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8)
+ * with a byte-at-a-time loop.
+ *
+ * Up to three optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ * 1. NEON — processes 16 bytes per iteration on aarch64.
+ * 2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ * 3. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ */
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+ const uint8_t *cursor = start;
+
+ // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
+ // Each high nibble is assigned a unique bit; the low nibble table
+ // contains the OR of bits for all high nibbles that have an
+ // identifier character at that low nibble position. A byte is an
+ // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
+ static const uint8_t low_lut_data[16] = {
+ 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
+ 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
+ };
+ static const uint8_t high_lut_data[16] = {
+ 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ const uint8x16_t low_lut = vld1q_u8(low_lut_data);
+ const uint8x16_t high_lut = vld1q_u8(high_lut_data);
+ const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+
+ while (cursor + 16 <= end) {
+ uint8x16_t v = vld1q_u8(cursor);
+
+ uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+ uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+ uint8x16_t ident = vandq_u8(lo_class, hi_class);
+
+ // Fast check: if the per-byte minimum is nonzero, every byte matched.
+ if (vminvq_u8(ident) != 0) {
+ cursor += 16;
+ continue;
+ }
+
+ // Find the first non-identifier byte (zero in ident).
+ uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
+ uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
+
+ if (lo != 0) {
+ cursor += pm_ctzll(lo) / 8;
+ } else {
+ uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
+ cursor += 8 + pm_ctzll(hi) / 8;
+ }
+
+ return (size_t) (cursor - start);
+ }
+
+ return (size_t) (cursor - start);
+}
+
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
+
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+ const uint8_t *cursor = start;
+
+ while (cursor + 16 <= end) {
+ __m128i v = _mm_loadu_si128((const __m128i *) cursor);
+ __m128i zero = _mm_setzero_si128();
+
+ // Unsigned range check via saturating subtraction:
+ // byte >= lo ⟺ saturate(lo - byte) == 0
+ // byte <= hi ⟺ saturate(byte - hi) == 0
+
+ // Fold case: OR with 0x20 maps A-Z to a-z.
+ __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
+ __m128i letter = _mm_and_si128(
+ _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
+ _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
+
+ __m128i digit = _mm_and_si128(
+ _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
+ _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
+
+ __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
+
+ __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
+ int mask = _mm_movemask_epi8(ident);
+
+ if (mask == 0xFFFF) {
+ cursor += 16;
+ continue;
+ }
+
+ cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
+ return (size_t) (cursor - start);
+ }
+
+ return (size_t) (cursor - start);
+}
+
+// The SWAR path uses pm_ctzll to find the first non-matching byte within a
+// word, which only yields the correct byte index on little-endian targets.
+// We gate on a positive little-endian check so that unknown-endianness
+// platforms safely fall through to the no-op fallback.
+#elif defined(PRISM_HAS_SWAR)
+
+/**
+ * Portable SWAR fallback — processes 8 bytes per iteration.
+ *
+ * The byte-wise range checks avoid cross-byte borrows by pre-setting the high
+ * bit of each byte before subtraction: (byte | 0x80) - lo has a minimum value
+ * of 0x80 - 0x7F = 1, so underflow (and thus a borrow into the next byte) is
+ * impossible. The result has bit 7 set if and only if byte >= lo. The same
+ * reasoning applies to the upper-bound direction.
+ */
+static PRISM_INLINE size_t
+scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
+ static const uint64_t ones = 0x0101010101010101ULL;
+ static const uint64_t highs = 0x8080808080808080ULL;
+ const uint8_t *cursor = start;
+
+ while (cursor + 8 <= end) {
+ uint64_t word;
+ memcpy(&word, cursor, 8);
+
+ // Bail on any non-ASCII byte.
+ if (word & highs) break;
+
+ uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
+
+ // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
+ // then check the lowercase range once. A-Z maps to a-z; the
+ // only non-letter byte that could alias into [0x61,0x7A] is one
+ // whose original value was in [0x41,0x5A] — which is exactly
+ // the uppercase letters we want to match.
+ uint64_t lowered = word | (ones * 0x20);
+ uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
+
+ // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
+ // bytes equal to underscore. Safe from cross-byte borrows because
+ // the ASCII guard above ensures all bytes are < 0x80.
+ uint64_t xor_us = word ^ (ones * 0x5F);
+ uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
+
+ uint64_t ident = digit | letter | underscore;
+
+ if (ident == highs) {
+ cursor += 8;
+ continue;
+ }
+
+ // Find the first non-identifier byte. On little-endian the first
+ // byte sits in the least-significant position.
+ uint64_t not_ident = ~ident & highs;
+ cursor += pm_ctzll(not_ident) / 8;
+ return (size_t) (cursor - start);
+ }
+
+ return (size_t) (cursor - start);
+}
+
+#else
+
+// No-op fallback for big-endian or other unsupported platforms.
+// The caller's byte-at-a-time loop handles everything.
+#define scan_identifier_ascii(start, end) ((size_t) 0)
+
+#endif
+
+/**
* Like the above, this function is also used extremely frequently to lex all of
* the identifiers in a source file once the first character has been found. So
* it's important that it be as fast as possible.
*/
-static inline size_t
+static PRISM_INLINE size_t
char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
if (n <= 0) {
return 0;
@@ -1729,7 +2102,7 @@ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = {
#undef BIT
#undef PUNCT
-static inline bool
+static PRISM_INLINE bool
char_is_global_name_punctuation(const uint8_t b) {
const unsigned int i = (const unsigned int) b;
if (i <= 0x20 || 0x7e < i) return false;
@@ -1737,7 +2110,7 @@ char_is_global_name_punctuation(const uint8_t b) {
return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
}
-static inline bool
+static PRISM_INLINE bool
token_is_setter_name(pm_token_t *token) {
return (
(token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
@@ -1825,7 +2198,7 @@ pm_local_is_keyword(const char *source, size_t length) {
/**
* Set the given flag on the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
node->flags |= flag;
}
@@ -1833,7 +2206,7 @@ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
/**
* Remove the given flag from the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
node->flags &= (pm_node_flags_t) ~flag;
}
@@ -1841,7 +2214,7 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
/**
* Set the repeated parameter flag on the given node.
*/
-static inline void
+static PRISM_INLINE void
pm_node_flag_set_repeated_parameter(pm_node_t *node) {
assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
@@ -1869,7 +2242,7 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) {
/**
* Parse out the options for a regular expression.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
pm_node_flags_t flags = 0;
@@ -1895,9 +2268,9 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin
size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
if (unknown_flags_length != 0) {
const char *word = unknown_flags_length >= 2 ? "options" : "option";
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
}
- pm_buffer_free(&unknown_flags);
+ pm_buffer_cleanup(&unknown_flags);
}
return flags;
@@ -1915,36 +2288,45 @@ static size_t
pm_statements_node_body_length(pm_statements_node_t *node);
/**
- * This function is here to allow us a place to extend in the future when we
- * implement our own arena allocation.
+ * Move an integer's values array into the arena. If the integer has heap-
+ * allocated values, copy them to the arena and free the original.
*/
-static inline void *
-pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
- void *memory = xcalloc(1, size);
- if (memory == NULL) {
- fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
- abort();
+static PRISM_INLINE void
+pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
+ if (integer->values != NULL) {
+ size_t byte_size = integer->length * sizeof(uint32_t);
+ uint32_t *old_values = integer->values;
+ integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
+ xfree(old_values);
}
- return memory;
}
-#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
-#define PM_NODE_IDENTIFY(parser) (++parser->node_id)
-
/**
- * Allocate a new MissingNode node.
+ * Allocate a new ErrorRecoveryNode node with no unexpected child.
*/
-static pm_missing_node_t *
-pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
-
- *node = (pm_missing_node_t) {{
- .type = PM_MISSING_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = start, .end = end }
- }};
+static pm_error_recovery_node_t *
+pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
+ return pm_error_recovery_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = length }),
+ NULL
+ );
+}
- return node;
+/**
+ * Allocate a new ErrorRecoveryNode node wrapping an unexpected child node.
+ */
+static pm_error_recovery_node_t *
+pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) {
+ return pm_error_recovery_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ unexpected->location,
+ unexpected
+ );
}
/**
@@ -1953,23 +2335,16 @@ pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t
static pm_alias_global_variable_node_t *
pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
- pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
-
- *node = (pm_alias_global_variable_node_t) {
- {
- .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = old_name->location.end
- },
- },
- .new_name = new_name,
- .old_name = old_name,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
- return node;
+ return pm_alias_global_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
+ new_name,
+ old_name,
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -1978,23 +2353,16 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw
static pm_alias_method_node_t *
pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
- pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
- *node = (pm_alias_method_node_t) {
- {
- .type = PM_ALIAS_METHOD_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = old_name->location.end
- },
- },
- .new_name = new_name,
- .old_name = old_name,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
-
- return node;
+ return pm_alias_method_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
+ new_name,
+ old_name,
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -2002,23 +2370,15 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n
*/
static pm_alternation_pattern_node_t *
pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
- pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
-
- *node = (pm_alternation_pattern_node_t) {
- {
- .type = PM_ALTERNATION_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end
- },
- },
- .left = left,
- .right = right,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_alternation_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2028,23 +2388,15 @@ static pm_and_node_t *
pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
- pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
-
- *node = (pm_and_node_t) {
- {
- .type = PM_AND_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end
- },
- },
- .left = left,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .right = right
- };
-
- return node;
+ return pm_and_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2052,18 +2404,13 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera
*/
static pm_arguments_node_t *
pm_arguments_node_create(pm_parser_t *parser) {
- pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
-
- *node = (pm_arguments_node_t) {
- {
- .type = PM_ARGUMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .arguments = { 0 }
- };
-
- return node;
+ return pm_arguments_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
@@ -2078,19 +2425,22 @@ pm_arguments_node_size(pm_arguments_node_t *node) {
* Append an argument to an arguments node.
*/
static void
-pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
+pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
if (pm_arguments_node_size(node) == 0) {
- node->base.location.start = argument->location.start;
+ PM_NODE_START_SET_NODE(node, argument);
}
- node->base.location.end = argument->location.end;
- pm_node_list_append(&node->arguments, argument);
+ if (PM_NODE_END(node) < PM_NODE_END(argument)) {
+ PM_NODE_LENGTH_SET_NODE(node, argument);
+ }
+
+ pm_node_list_append(arena, &node->arguments, argument);
if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
- pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
+ pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
} else {
- pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
+ pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
}
}
}
@@ -2100,43 +2450,49 @@ pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argumen
*/
static pm_array_node_t *
pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
- pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
-
- *node = (pm_array_node_t) {
- {
- .type = PM_ARRAY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(opening)
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .elements = { 0 }
- };
-
- return node;
+ if (opening == NULL) {
+ return pm_array_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
+ } else {
+ return pm_array_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, opening)
+ );
+ }
}
/**
* Append an argument to an array node.
*/
-static inline void
-pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
- if (!node->elements.size && !node->opening_loc.start) {
- node->base.location.start = element->location.start;
+static PRISM_INLINE void
+pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
+ if (!node->elements.size && !node->opening_loc.length) {
+ PM_NODE_START_SET_NODE(node, element);
}
- pm_node_list_append(&node->elements, element);
- node->base.location.end = element->location.end;
+ pm_node_list_append(arena, &node->elements, element);
+ PM_NODE_LENGTH_SET_NODE(node, element);
// If the element is not a static literal, then the array is not a static
// literal. Turn that flag off.
if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
- pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
}
if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
- pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
+ pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
}
}
@@ -2144,10 +2500,10 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
* Set the closing token and end location of an array node.
*/
static void
-pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
- assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
- node->base.location.end = closing->end;
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
+ assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+ node->closing_loc = TOK2LOC(parser, closing);
}
/**
@@ -2156,24 +2512,18 @@ pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = nodes->nodes[0]->location.start,
- .end = nodes->nodes[nodes->size - 1]->location.end
- },
- },
- .constant = NULL,
- .rest = NULL,
- .requireds = { 0 },
- .posts = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
+ pm_array_pattern_node_t *node = pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list.
@@ -2185,9 +2535,9 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
node->rest = child;
found_rest = true;
} else if (found_rest) {
- pm_node_list_append(&node->posts, child);
+ pm_node_list_append(parser->arena, &node->posts, child);
} else {
- pm_node_list_append(&node->requireds, child);
+ pm_node_list_append(parser->arena, &node->requireds, child);
}
}
@@ -2199,23 +2549,18 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = rest->location,
- },
- .constant = NULL,
- .rest = rest,
- .requireds = { 0 },
- .posts = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(rest),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ rest,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -2224,26 +2569,18 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = constant->location.start,
- .end = closing->end
- },
- },
- .constant = constant,
- .rest = NULL,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .requireds = { 0 },
- .posts = { 0 }
- };
-
- return node;
+ return pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
+ constant,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -2252,31 +2589,23 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant,
*/
static pm_array_pattern_node_t *
pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
-
- *node = (pm_array_pattern_node_t) {
- {
- .type = PM_ARRAY_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .constant = NULL,
- .rest = NULL,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .requireds = { 0 },
- .posts = { 0 }
- };
-
- return node;
+ return pm_array_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
-static inline void
-pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
- pm_node_list_append(&node->requireds, inner);
+static PRISM_INLINE void
+pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
+ pm_node_list_append(arena, &node->requireds, inner);
}
/**
@@ -2284,15 +2613,14 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t
*/
static pm_assoc_node_t *
pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
- pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
- const uint8_t *end;
+ uint32_t end;
- if (value != NULL && value->location.end > key->location.end) {
- end = value->location.end;
- } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
- end = operator->end;
+ if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
+ end = PM_NODE_END(value);
+ } else if (operator != NULL) {
+ end = PM_TOKEN_END(parser, operator);
} else {
- end = key->location.end;
+ end = PM_NODE_END(key);
}
// Hash string keys will be frozen, so we can mark them as frozen here so
@@ -2312,22 +2640,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
}
- *node = (pm_assoc_node_t) {
- {
- .type = PM_ASSOC_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = key->location.start,
- .end = end
- },
- },
- .key = key,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_assoc_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
+ key,
+ value,
+ NTOK2LOC(parser, operator)
+ );
}
/**
@@ -2336,22 +2657,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
static pm_assoc_splat_node_t *
pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
assert(operator->type == PM_TOKEN_USTAR_STAR);
- pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
-
- *node = (pm_assoc_splat_node_t) {
- {
- .type = PM_ASSOC_SPLAT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = value == NULL ? operator->end : value->location.end
- },
- },
- .value = value,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
- return node;
+ return pm_assoc_splat_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2360,18 +2674,14 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token
static pm_back_reference_read_node_t *
pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_BACK_REFERENCE);
- pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
- *node = (pm_back_reference_read_node_t) {
- {
- .type = PM_BACK_REFERENCE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_back_reference_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -2379,23 +2689,21 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name)
*/
static pm_begin_node_t *
pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
- pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
-
- *node = (pm_begin_node_t) {
- {
- .type = PM_BEGIN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = begin_keyword->start,
- .end = statements == NULL ? begin_keyword->end : statements->base.location.end
- },
- },
- .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
- .statements = statements,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
+ uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
+
+ return pm_begin_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, begin_keyword),
+ statements,
+ NULL,
+ NULL,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -2403,11 +2711,10 @@ pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_st
*/
static void
pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
- // If the begin keyword doesn't exist, we set the start on the begin_node
- if (!node->begin_keyword_loc.start) {
- node->base.location.start = rescue_clause->base.location.start;
+ if (node->begin_keyword_loc.length == 0) {
+ PM_NODE_START_SET_NODE(node, rescue_clause);
}
- node->base.location.end = rescue_clause->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
node->rescue_clause = rescue_clause;
}
@@ -2416,7 +2723,10 @@ pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_
*/
static void
pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
- node->base.location.end = else_clause->base.location.end;
+ if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
+ PM_NODE_START_SET_NODE(node, else_clause);
+ }
+ PM_NODE_LENGTH_SET_NODE(node, else_clause);
node->else_clause = else_clause;
}
@@ -2425,7 +2735,10 @@ pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause
*/
static void
pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
- node->base.location.end = ensure_clause->base.location.end;
+ if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
+ PM_NODE_START_SET_NODE(node, ensure_clause);
+ }
+ PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
node->ensure_clause = ensure_clause;
}
@@ -2433,11 +2746,10 @@ pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_
* Set the end keyword and end location of a begin node.
*/
static void
-pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
- assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
-
- node->base.location.end = end_keyword->end;
- node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
+pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
+ assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
}
/**
@@ -2445,22 +2757,16 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo
*/
static pm_block_argument_node_t *
pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
- pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
-
- *node = (pm_block_argument_node_t) {
- {
- .type = PM_BLOCK_ARGUMENT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = expression == NULL ? operator->end : expression->location.end
- },
- },
- .expression = expression,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ assert(operator->type == PM_TOKEN_UAMPERSAND);
+
+ return pm_block_argument_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
+ expression,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2468,22 +2774,17 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p
*/
static pm_block_node_t *
pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
- pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
-
- *node = (pm_block_node_t) {
- {
- .type = PM_BLOCK_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = opening->start, .end = closing->end },
- },
- .locals = *locals,
- .parameters = parameters,
- .body = body,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_block_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ *locals,
+ parameters,
+ body,
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -2491,24 +2792,17 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
*/
static pm_block_parameter_node_t *
pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
- assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
- pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
-
- *node = (pm_block_parameter_node_t) {
- {
- .type = PM_BLOCK_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- },
- },
- .name = pm_parser_optional_constant_id_token(parser, name),
- .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
+
+ return pm_block_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+ name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+ NTOK2LOC(parser, name),
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -2516,53 +2810,44 @@ pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, cons
*/
static pm_block_parameters_node_t *
pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
- pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
-
- const uint8_t *start;
- if (opening->type != PM_TOKEN_NOT_PROVIDED) {
- start = opening->start;
+ uint32_t start;
+ if (opening != NULL) {
+ start = PM_TOKEN_START(parser, opening);
} else if (parameters != NULL) {
- start = parameters->base.location.start;
+ start = PM_NODE_START(parameters);
} else {
- start = NULL;
+ start = 0;
}
- const uint8_t *end;
+ uint32_t end;
if (parameters != NULL) {
- end = parameters->base.location.end;
- } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
- end = opening->end;
+ end = PM_NODE_END(parameters);
+ } else if (opening != NULL) {
+ end = PM_TOKEN_END(parser, opening);
} else {
- end = NULL;
- }
-
- *node = (pm_block_parameters_node_t) {
- {
- .type = PM_BLOCK_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = start,
- .end = end
- }
- },
- .parameters = parameters,
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .locals = { 0 }
- };
-
- return node;
+ end = 0;
+ }
+
+ return pm_block_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ parameters,
+ ((pm_node_list_t) { 0 }),
+ NTOK2LOC(parser, opening),
+ ((pm_location_t) { 0 })
+ );
}
/**
* Set the closing location of a BlockParametersNode node.
*/
static void
-pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
- assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
-
- node->base.location.end = closing->end;
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
+pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
+ assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+ node->closing_loc = TOK2LOC(parser, closing);
}
/**
@@ -2570,29 +2855,27 @@ pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_
*/
static pm_block_local_variable_node_t *
pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
-
- *node = (pm_block_local_variable_node_t) {
- {
- .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_block_local_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
* Append a new block-local variable to a BlockParametersNode node.
*/
static void
-pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
- pm_node_list_append(&node->locals, (pm_node_t *) local);
+pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
+ pm_node_list_append(arena, &node->locals, UP(local));
+
+ if (PM_NODE_LENGTH(node) == 0) {
+ PM_NODE_START_SET_NODE(node, local);
+ }
- if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
- node->base.location.end = local->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, local);
}
/**
@@ -2601,66 +2884,55 @@ pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm
static pm_break_node_t *
pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
- pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
- *node = (pm_break_node_t) {
- {
- .type = PM_BREAK_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- },
- },
- .arguments = arguments,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
-
- return node;
+ return pm_break_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+ arguments,
+ TOK2LOC(parser, keyword)
+ );
}
// There are certain flags that we want to use internally but don't want to
// expose because they are not relevant beyond parsing. Therefore we'll define
// them here and not define them in config.yml/a header file.
-static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
-static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
+static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
+
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
+static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
/**
- * Allocate and initialize a new CallNode node. This sets everything to NULL or
- * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden
- * in the various specializations of this function.
+ * Allocate and initialize a new CallNode node. This sets everything to NULL
+ * such that its values can be overridden in the various specializations of this
+ * function.
*/
static pm_call_node_t *
pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
- pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
-
- *node = (pm_call_node_t) {
- {
- .type = PM_CALL_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser),
- },
- .receiver = NULL,
- .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .arguments = NULL,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .block = NULL,
- .name = 0
- };
-
- return node;
+ return pm_call_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_UNSET,
+ NULL,
+ ((pm_location_t) { 0 }),
+ 0,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL
+ );
}
/**
* Returns the value that the ignore visibility flag should be set to for the
* given receiver.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
}
@@ -2680,12 +2952,15 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
pm_call_node_t *node = pm_call_node_create(parser, flags);
- node->base.location.start = receiver->location.start;
- node->base.location.end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_NODE(node, receiver);
+
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
node->receiver = receiver;
node->message_loc.start = arguments->opening_loc.start;
- node->message_loc.end = arguments->closing_loc.end;
+ node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
@@ -2706,20 +2981,22 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
- node->base.location.start = MIN(receiver->location.start, argument->location.start);
- node->base.location.end = MAX(receiver->location.end, argument->location.end);
+ PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
+ PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
node->receiver = receiver;
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+ node->message_loc = TOK2LOC(parser, operator);
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
- pm_arguments_node_arguments_append(arguments, argument);
+ pm_arguments_node_arguments_append(parser->arena, arguments, argument);
node->arguments = arguments;
node->name = pm_parser_constant_id_token(parser, operator);
return node;
}
+static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
+
/**
* Allocate and initialize a new CallNode node from a call expression.
*/
@@ -2729,26 +3006,31 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = receiver->location.start;
- const uint8_t *end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_NODE(node, receiver);
+ const pm_location_t *end = pm_arguments_end(arguments);
if (end == NULL) {
- end = message->end;
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
+ } else {
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
}
- node->base.location.end = end;
node->receiver = receiver;
- node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->call_operator_loc = TOK2LOC(parser, operator);
+ node->message_loc = TOK2LOC(parser, message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
- pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+ pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
}
- node->name = pm_parser_constant_id_token(parser, message);
+ /**
+ * If the final character is `@` as is the case for `foo.~@`,
+ * we should ignore the @ in the same way we do for symbols.
+ */
+ node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
return node;
}
@@ -2758,12 +3040,9 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
static pm_call_node_t *
pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
pm_call_node_t *node = pm_call_node_create(parser, 0);
- node->base.location.start = parser->start;
- node->base.location.end = parser->end;
+ node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
node->receiver = receiver;
- node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
- node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
node->arguments = arguments;
node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
@@ -2778,10 +3057,12 @@ static pm_call_node_t *
pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location.start = message->start;
- node->base.location.end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_TOKEN(parser, node, message);
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->message_loc = TOK2LOC(parser, message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
@@ -2799,7 +3080,7 @@ static pm_call_node_t *
pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location = PM_LOCATION_NULL_VALUE(parser);
+ node->base.location = (pm_location_t) { 0 };
node->arguments = arguments;
node->name = name;
@@ -2816,16 +3097,16 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = message->start;
- if (arguments->closing_loc.start != NULL) {
- node->base.location.end = arguments->closing_loc.end;
+ PM_NODE_START_SET_TOKEN(parser, node, message);
+ if (arguments->closing_loc.length > 0) {
+ PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
} else {
assert(receiver != NULL);
- node->base.location.end = receiver->location.end;
+ PM_NODE_LENGTH_SET_NODE(node, receiver);
}
node->receiver = receiver;
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->message_loc = TOK2LOC(parser, message);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
@@ -2843,18 +3124,20 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = receiver->location.start;
- node->base.location.end = pm_arguments_end(arguments);
+ PM_NODE_START_SET_NODE(node, receiver);
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+ PM_NODE_LENGTH_SET_LOCATION(node, end);
node->receiver = receiver;
- node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+ node->call_operator_loc = TOK2LOC(parser, operator);
node->opening_loc = arguments->opening_loc;
node->arguments = arguments->arguments;
node->closing_loc = arguments->closing_loc;
node->block = arguments->block;
if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
- pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
+ pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
}
node->name = pm_parser_constant_id_constant(parser, "call", 4);
@@ -2870,11 +3153,11 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
- node->base.location.start = operator->start;
- node->base.location.end = receiver->location.end;
+ PM_NODE_START_SET_TOKEN(parser, node, operator);
+ PM_NODE_LENGTH_SET_NODE(node, receiver);
node->receiver = receiver;
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+ node->message_loc = TOK2LOC(parser, operator);
node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
return node;
@@ -2888,8 +3171,8 @@ static pm_call_node_t *
pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
- node->base.location = PM_LOCATION_TOKEN_VALUE(message);
- node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
+ node->base.location = TOK2LOC(parser, message);
+ node->message_loc = TOK2LOC(parser, message);
node->name = pm_parser_constant_id_token(parser, message);
return node;
@@ -2899,14 +3182,14 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
* Returns whether or not this call can be used on the left-hand side of an
* operator assignment.
*/
-static inline bool
+static PRISM_INLINE bool
pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
return (
- (node->message_loc.start != NULL) &&
- (node->message_loc.end[-1] != '!') &&
- (node->message_loc.end[-1] != '?') &&
- char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) &&
- (node->opening_loc.start == NULL) &&
+ (node->message_loc.length > 0) &&
+ (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
+ (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
+ char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
+ (node->opening_loc.length == 0) &&
(node->arguments == NULL) &&
(node->block == NULL)
);
@@ -2922,10 +3205,10 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p
if (write_constant->length > 0) {
size_t length = write_constant->length - 1;
- void *memory = xmalloc(length);
+ uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
memcpy(memory, write_constant->start, length);
- *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
+ *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
} else {
// We can get here if the message was missing because of a syntax error.
*read_name = pm_parser_constant_id_constant(parser, "", 0);
@@ -2939,33 +3222,25 @@ static pm_call_and_write_node_t *
pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
- *node = (pm_call_and_write_node_t) {
- {
- .type = PM_CALL_AND_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .message_loc = target->message_loc,
- .read_name = 0,
- .write_name = target->name,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
+ pm_call_and_write_node_t *node = pm_call_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->message_loc,
+ 0,
+ target->name,
+ TOK2LOC(parser, operator),
+ value
+ );
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -2976,7 +3251,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
*/
static void
pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
- if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
pm_node_t *node;
PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
@@ -2999,35 +3274,28 @@ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *argumen
static pm_index_and_write_node_t *
pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
pm_index_arguments_check(parser, target->arguments, target->block);
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_and_write_node_t) {
- {
- .type = PM_INDEX_AND_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_and_write_node_t *node = pm_index_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block,
+ TOK2LOC(parser, operator),
+ value
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3038,34 +3306,26 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons
static pm_call_operator_write_node_t *
pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
- pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
- *node = (pm_call_operator_write_node_t) {
- {
- .type = PM_CALL_OPERATOR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .message_loc = target->message_loc,
- .read_name = 0,
- .write_name = target->name,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
+ pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->message_loc,
+ 0,
+ target->name,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+ TOK2LOC(parser, operator),
+ value
+ );
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3075,36 +3335,28 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
*/
static pm_index_operator_write_node_t *
pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
-
pm_index_arguments_check(parser, target->arguments, target->block);
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_operator_write_node_t) {
- {
- .type = PM_INDEX_OPERATOR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+ TOK2LOC(parser, operator),
+ value
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3116,33 +3368,25 @@ static pm_call_or_write_node_t *
pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(target->block == NULL);
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
- *node = (pm_call_or_write_node_t) {
- {
- .type = PM_CALL_OR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .message_loc = target->message_loc,
- .read_name = 0,
- .write_name = target->name,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
+ pm_call_or_write_node_t *node = pm_call_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->message_loc,
+ 0,
+ target->name,
+ TOK2LOC(parser, operator),
+ value
+ );
pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3153,35 +3397,28 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
static pm_index_or_write_node_t *
pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
pm_index_arguments_check(parser, target->arguments, target->block);
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_or_write_node_t) {
- {
- .type = PM_INDEX_OR_WRITE_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_or_write_node_t *node = pm_index_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->receiver,
+ target->call_operator_loc,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block,
+ TOK2LOC(parser, operator),
+ value
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3192,25 +3429,27 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
*/
static pm_call_target_node_t *
pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
- pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
+ pm_call_target_node_t *node = pm_call_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target),
+ PM_LOCATION_INIT_NODE(target),
+ target->receiver,
+ target->call_operator_loc,
+ target->name,
+ target->message_loc
+ );
- *node = (pm_call_target_node_t) {
- {
- .type = PM_CALL_TARGET_NODE,
- .flags = target->base.flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = target->base.location
- },
- .receiver = target->receiver,
- .call_operator_loc = target->call_operator_loc,
- .name = target->name,
- .message_loc = target->message_loc
- };
+ /* It is possible to get here where we have parsed an invalid syntax tree
+ * where the call operator was not present. In that case we will have a
+ * problem because it is a required location. In this case we need to fill
+ * it in with a fake location so that the syntax tree remains valid. */
+ if (node->call_operator_loc.length == 0) {
+ node->call_operator_loc = target->base.location;
+ }
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3221,30 +3460,23 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
*/
static pm_index_target_node_t *
pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
- pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
- pm_node_flags_t flags = target->base.flags;
-
pm_index_arguments_check(parser, target->arguments, target->block);
-
assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
- *node = (pm_index_target_node_t) {
- {
- .type = PM_INDEX_TARGET_NODE,
- .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = target->base.location
- },
- .receiver = target->receiver,
- .opening_loc = target->opening_loc,
- .arguments = target->arguments,
- .closing_loc = target->closing_loc,
- .block = (pm_block_argument_node_t *) target->block,
- };
- // Here we're going to free the target, since it is no longer necessary.
- // However, we don't want to call `pm_node_destroy` because we want to keep
- // around all of its children since we just reused them.
- xfree(target);
+ pm_index_target_node_t *node = pm_index_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
+ PM_LOCATION_INIT_NODE(target),
+ target->receiver,
+ target->opening_loc,
+ target->arguments,
+ target->closing_loc,
+ (pm_block_argument_node_t *) target->block
+ );
+
+ // The target is no longer necessary because we've reused its children.
+ // It is arena-allocated so no explicit free is needed.
return node;
}
@@ -3254,23 +3486,15 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
*/
static pm_capture_pattern_node_t *
pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
- pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
-
- *node = (pm_capture_pattern_node_t) {
- {
- .type = PM_CAPTURE_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = value->location.start,
- .end = target->base.location.end
- },
- },
- .value = value,
- .target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_capture_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(value, target),
+ value,
+ target,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -3278,36 +3502,28 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_v
*/
static pm_case_node_t *
pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
- pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
-
- *node = (pm_case_node_t) {
- {
- .type = PM_CASE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = case_keyword->start,
- .end = end_keyword->end
- },
- },
- .predicate = predicate,
- .else_clause = NULL,
- .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .conditions = { 0 }
- };
-
- return node;
+ return pm_case_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
+ predicate,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ TOK2LOC(parser, case_keyword),
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
* Append a new condition to a CaseNode node.
*/
static void
-pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
+pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
- pm_node_list_append(&node->conditions, condition);
- node->base.location.end = condition->location.end;
+ pm_node_list_append(arena, &node->conditions, condition);
+ PM_NODE_LENGTH_SET_NODE(node, condition);
}
/**
@@ -3316,53 +3532,45 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
static void
pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
node->else_clause = else_clause;
- node->base.location.end = else_clause->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, else_clause);
}
/**
* Set the end location for a CaseNode node.
*/
static void
-pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
- node->base.location.end = end_keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
}
/**
* Allocate and initialize a new CaseMatchNode node.
*/
static pm_case_match_node_t *
-pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
- pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
-
- *node = (pm_case_match_node_t) {
- {
- .type = PM_CASE_MATCH_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = case_keyword->start,
- .end = end_keyword->end
- },
- },
- .predicate = predicate,
- .else_clause = NULL,
- .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .conditions = { 0 }
- };
-
- return node;
+pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
+ return pm_case_match_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, case_keyword),
+ predicate,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ TOK2LOC(parser, case_keyword),
+ ((pm_location_t) { 0 })
+ );
}
/**
* Append a new condition to a CaseMatchNode node.
*/
static void
-pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
+pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
- pm_node_list_append(&node->conditions, condition);
- node->base.location.end = condition->location.end;
+ pm_node_list_append(arena, &node->conditions, condition);
+ PM_NODE_LENGTH_SET_NODE(node, condition);
}
/**
@@ -3371,16 +3579,16 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi
static void
pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
node->else_clause = else_clause;
- node->base.location.end = else_clause->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, else_clause);
}
/**
* Set the end location for a CaseMatchNode node.
*/
static void
-pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
- node->base.location.end = end_keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
+pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
}
/**
@@ -3388,25 +3596,20 @@ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_toke
*/
static pm_class_node_t *
pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
-
- *node = (pm_class_node_t) {
- {
- .type = PM_CLASS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = class_keyword->start, .end = end_keyword->end },
- },
- .locals = *locals,
- .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
- .constant_path = constant_path,
- .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
- .superclass = superclass,
- .body = body,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_class_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
+ *locals,
+ TOK2LOC(parser, class_keyword),
+ constant_path,
+ NTOK2LOC(parser, inheritance_operator),
+ superclass,
+ body,
+ TOK2LOC(parser, end_keyword),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -3415,24 +3618,17 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p
static pm_class_variable_and_write_node_t *
pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
- *node = (pm_class_variable_and_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_class_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3440,25 +3636,17 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r
*/
static pm_class_variable_operator_write_node_t *
pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
-
- *node = (pm_class_variable_operator_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_class_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -3467,24 +3655,17 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia
static pm_class_variable_or_write_node_t *
pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
-
- *node = (pm_class_variable_or_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_class_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3493,18 +3674,14 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re
static pm_class_variable_read_node_t *
pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_CLASS_VARIABLE);
- pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
-
- *node = (pm_class_variable_read_node_t) {
- {
- .type = PM_CLASS_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .name = pm_parser_constant_id_token(parser, token)
- };
- return node;
+ return pm_class_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_parser_constant_id_token(parser, token)
+ );
}
/**
@@ -3513,9 +3690,9 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token)
* a = *b
* a = 1, 2, 3
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
- if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
+ if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
return flags;
}
return 0;
@@ -3526,25 +3703,16 @@ pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
*/
static pm_class_variable_write_node_t *
pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
- pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
-
- *node = (pm_class_variable_write_node_t) {
- {
- .type = PM_CLASS_VARIABLE_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = read_node->base.location.start,
- .end = value->location.end
- },
- },
- .name = read_node->name,
- .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_class_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(read_node, value),
+ read_node->name,
+ read_node->base.location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -3553,23 +3721,16 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_
static pm_constant_path_and_write_node_t *
pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
-
- *node = (pm_constant_path_and_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_constant_path_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3577,24 +3738,16 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod
*/
static pm_constant_path_operator_write_node_t *
pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
-
- *node = (pm_constant_path_operator_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .target = target,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_constant_path_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -3603,23 +3756,16 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat
static pm_constant_path_or_write_node_t *
pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
- *node = (pm_constant_path_or_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .target = target,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_path_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3628,29 +3774,22 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node
static pm_constant_path_node_t *
pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
pm_assert_value_expression(parser, parent);
- pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
if (name_token->type == PM_TOKEN_CONSTANT) {
name = pm_parser_constant_id_token(parser, name_token);
}
- *node = (pm_constant_path_node_t) {
- {
- .type = PM_CONSTANT_PATH_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = parent == NULL ? delimiter->start : parent->location.start,
- .end = name_token->end
- },
- },
- .parent = parent,
- .name = name,
- .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
- };
-
- return node;
+ return pm_constant_path_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
+ parent,
+ name,
+ TOK2LOC(parser, delimiter),
+ TOK2LOC(parser, name_token)
+ );
}
/**
@@ -3658,24 +3797,15 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to
*/
static pm_constant_path_write_node_t *
pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
-
- *node = (pm_constant_path_write_node_t) {
- {
- .type = PM_CONSTANT_PATH_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- },
- },
- .target = target,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_path_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ target,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3684,24 +3814,17 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t
static pm_constant_and_write_node_t *
pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
- *node = (pm_constant_and_write_node_t) {
- {
- .type = PM_CONSTANT_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3709,25 +3832,17 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *
*/
static pm_constant_operator_write_node_t *
pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
-
- *node = (pm_constant_operator_write_node_t) {
- {
- .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_constant_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -3736,24 +3851,17 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod
static pm_constant_or_write_node_t *
pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
-
- *node = (pm_constant_or_write_node_t) {
- {
- .type = PM_CONSTANT_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_constant_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -3761,19 +3869,15 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t
*/
static pm_constant_read_node_t *
pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
- pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
-
- *node = (pm_constant_read_node_t) {
- {
- .type = PM_CONSTANT_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name)
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
+
+ return pm_constant_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -3781,25 +3885,16 @@ pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
*/
static pm_constant_write_node_t *
pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
-
- *node = (pm_constant_write_node_t) {
- {
- .type = PM_CONSTANT_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_constant_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -3810,7 +3905,7 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_BEGIN_NODE: {
const pm_begin_node_t *cast = (pm_begin_node_t *) node;
- if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
+ if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
break;
}
case PM_PARENTHESES_NODE: {
@@ -3865,65 +3960,45 @@ pm_def_node_create(
const pm_token_t *equal,
const pm_token_t *end_keyword
) {
- pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
- const uint8_t *end;
-
- if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
- end = body->location.end;
- } else {
- end = end_keyword->end;
- }
-
if (receiver != NULL) {
pm_def_node_receiver_check(parser, receiver);
}
- *node = (pm_def_node_t) {
- {
- .type = PM_DEF_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = def_keyword->start, .end = end },
- },
- .name = name,
- .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
- .receiver = receiver,
- .parameters = parameters,
- .body = body,
- .locals = *locals,
- .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
- .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
- .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
- .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_def_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
+ name,
+ TOK2LOC(parser, name_loc),
+ receiver,
+ parameters,
+ body,
+ *locals,
+ TOK2LOC(parser, def_keyword),
+ NTOK2LOC(parser, operator),
+ NTOK2LOC(parser, lparen),
+ NTOK2LOC(parser, rparen),
+ NTOK2LOC(parser, equal),
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
* Allocate a new DefinedNode node.
*/
static pm_defined_node_t *
-pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
- pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
-
- *node = (pm_defined_node_t) {
- {
- .type = PM_DEFINED_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword_loc->start,
- .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
- },
- },
- .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
- .value = value,
- .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
- .keyword_loc = *keyword_loc
- };
-
- return node;
+pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
+ return pm_defined_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
+ NTOK2LOC(parser, lparen),
+ value,
+ NTOK2LOC(parser, rparen),
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -3931,29 +4006,15 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t
*/
static pm_else_node_t *
pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
- pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
- const uint8_t *end = NULL;
- if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
- end = statements->base.location.end;
- } else {
- end = end_keyword->end;
- }
-
- *node = (pm_else_node_t) {
- {
- .type = PM_ELSE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = else_keyword->start,
- .end = end,
- },
- },
- .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
- .statements = statements,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_else_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
+ TOK2LOC(parser, else_keyword),
+ statements,
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -3961,23 +4022,15 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat
*/
static pm_embedded_statements_node_t *
pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
-
- *node = (pm_embedded_statements_node_t) {
- {
- .type = PM_EMBEDDED_STATEMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .statements = statements,
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_embedded_statements_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ statements,
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -3985,22 +4038,14 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin
*/
static pm_embedded_variable_node_t *
pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
- pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
-
- *node = (pm_embedded_variable_node_t) {
- {
- .type = PM_EMBEDDED_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = variable->location.end
- }
- },
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .variable = variable
- };
-
- return node;
+ return pm_embedded_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
+ TOK2LOC(parser, operator),
+ variable
+ );
}
/**
@@ -4008,23 +4053,15 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator
*/
static pm_ensure_node_t *
pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
- pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
-
- *node = (pm_ensure_node_t) {
- {
- .type = PM_ENSURE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = ensure_keyword->start,
- .end = end_keyword->end
- },
- },
- .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
- .statements = statements,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_ensure_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
+ TOK2LOC(parser, ensure_keyword),
+ statements,
+ TOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -4033,16 +4070,13 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_
static pm_false_node_t *
pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_FALSE);
- pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
- *node = (pm_false_node_t) {{
- .type = PM_FALSE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_false_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4051,50 +4085,31 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_find_pattern_node_t *
pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
- pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
-
+ assert(nodes->size >= 2);
pm_node_t *left = nodes->nodes[0];
- assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
- pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
-
- pm_node_t *right;
+ pm_node_t *right = nodes->nodes[nodes->size - 1];
- if (nodes->size == 1) {
- right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
- } else {
- right = nodes->nodes[nodes->size - 1];
- assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
- }
-
-#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
- // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
- // The resulting AST will anyway be ignored, but this file still needs to compile.
- pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
-#else
- pm_node_t *right_splat_node = right;
-#endif
- *node = (pm_find_pattern_node_t) {
- {
- .type = PM_FIND_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end,
- },
- },
- .constant = NULL,
- .left = left_splat_node,
- .right = right_splat_node,
- .requireds = { 0 },
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
+ assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
+ assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
+
+ pm_find_pattern_node_t *node = pm_find_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ NULL,
+ (pm_splat_node_t *) left,
+ ((pm_node_list_t) { 0 }),
+ (pm_splat_node_t *) right,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
// For now we're going to just copy over each pointer manually. This could be
// much more efficient, as we could instead resize the node list to only point
// to 1...-1.
for (size_t index = 1; index < nodes->size - 1; index++) {
- pm_node_list_append(&node->requireds, nodes->nodes[index]);
+ pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
}
return node;
@@ -4111,7 +4126,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
// First, get a buffer of the content.
size_t length = (size_t) diff;
- char *buffer = xmalloc(sizeof(char) * (length + 1));
+ const size_t buffer_size = sizeof(char) * (length + 1);
+ char *buffer = xmalloc(buffer_size);
memcpy((void *) buffer, token->start, length);
// Next, determine if we need to replace the decimal point because of
@@ -4145,8 +4161,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
// This should never happen, because we've already checked that the token
// is in a valid format. However it's good to be safe.
if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
- xfree((void *) buffer);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
+ xfree_sized(buffer, buffer_size);
return 0.0;
}
@@ -4164,12 +4180,12 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
ellipsis = "";
}
- pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
+ pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
}
// Finally we can free the buffer and return the value.
- xfree((void *) buffer);
+ xfree_sized(buffer, buffer_size);
return value;
}
@@ -4179,19 +4195,14 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
static pm_float_node_t *
pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT);
- pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
- *node = (pm_float_node_t) {
- {
- .type = PM_FLOAT_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .value = pm_double_parse(parser, token)
- };
-
- return node;
+ return pm_float_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_double_parse(parser, token)
+ );
}
/**
@@ -4201,22 +4212,17 @@ static pm_imaginary_node_t *
pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_float_node_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4226,17 +4232,14 @@ static pm_rational_node_t *
pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
- pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
- *node = (pm_rational_node_t) {
- {
- .type = PM_RATIONAL_NODE,
- .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numerator = { 0 },
- .denominator = { 0 }
- };
+ pm_rational_node_t *node = pm_rational_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_integer_t) { 0 }),
+ ((pm_integer_t) { 0 })
+ );
const uint8_t *start = token->start;
const uint8_t *end = token->end - 1; // r
@@ -4263,12 +4266,18 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
+ size_t fract_length = 0;
+ for (const uint8_t *fract = point; fract < end; ++fract) {
+ if (*fract != '_') ++fract_length;
+ }
digits[0] = '1';
- if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
- pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
- xfree(digits);
+ if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
+ pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
+ xfree_sized(digits, length);
pm_integers_reduce(&node->numerator, &node->denominator);
+ pm_integer_arena_move(parser->arena, &node->numerator);
+ pm_integer_arena_move(parser->arena, &node->denominator);
return node;
}
@@ -4280,22 +4289,17 @@ static pm_imaginary_node_t *
pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_float_node_rational_create(parser, &((pm_token_t) {
.type = PM_TOKEN_FLOAT_RATIONAL,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4312,27 +4316,19 @@ pm_for_node_create(
const pm_token_t *do_keyword,
const pm_token_t *end_keyword
) {
- pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
-
- *node = (pm_for_node_t) {
- {
- .type = PM_FOR_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = for_keyword->start,
- .end = end_keyword->end
- },
- },
- .index = index,
- .collection = collection,
- .statements = statements,
- .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
- .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_for_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
+ index,
+ collection,
+ statements,
+ TOK2LOC(parser, for_keyword),
+ TOK2LOC(parser, in_keyword),
+ NTOK2LOC(parser, do_keyword),
+ TOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -4341,15 +4337,13 @@ pm_for_node_create(
static pm_forwarding_arguments_node_t *
pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
- pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
-
- *node = (pm_forwarding_arguments_node_t) {{
- .type = PM_FORWARDING_ARGUMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_forwarding_arguments_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4358,15 +4352,13 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token
static pm_forwarding_parameter_node_t *
pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
- pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
-
- *node = (pm_forwarding_parameter_node_t) {{
- .type = PM_FORWARDING_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_forwarding_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4376,26 +4368,20 @@ static pm_forwarding_super_node_t *
pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
assert(token->type == PM_TOKEN_KEYWORD_SUPER);
- pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
pm_block_node_t *block = NULL;
if (arguments->block != NULL) {
block = (pm_block_node_t *) arguments->block;
}
- *node = (pm_forwarding_super_node_t) {
- {
- .type = PM_FORWARDING_SUPER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = token->start,
- .end = block != NULL ? block->base.location.end : token->end
- },
- },
- .block = block
- };
-
- return node;
+ return pm_forwarding_super_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ block
+ );
}
/**
@@ -4404,25 +4390,17 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm
*/
static pm_hash_pattern_node_t *
pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
-
- *node = (pm_hash_pattern_node_t) {
- {
- .type = PM_HASH_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .constant = NULL,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .elements = { 0 },
- .rest = NULL
- };
-
- return node;
+ return pm_hash_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -4430,46 +4408,36 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening
*/
static pm_hash_pattern_node_t *
pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
- pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
-
- const uint8_t *start;
- const uint8_t *end;
+ uint32_t start;
+ uint32_t end;
if (elements->size > 0) {
if (rest) {
- start = elements->nodes[0]->location.start;
- end = rest->location.end;
+ start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
+ end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
} else {
- start = elements->nodes[0]->location.start;
- end = elements->nodes[elements->size - 1]->location.end;
+ start = PM_NODE_START(elements->nodes[0]);
+ end = PM_NODE_END(elements->nodes[elements->size - 1]);
}
} else {
assert(rest != NULL);
- start = rest->location.start;
- end = rest->location.end;
- }
-
- *node = (pm_hash_pattern_node_t) {
- {
- .type = PM_HASH_PATTERN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = start,
- .end = end
- },
- },
- .constant = NULL,
- .elements = { 0 },
- .rest = rest,
- .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- pm_node_t *element;
- PM_NODE_LIST_FOREACH(elements, index, element) {
- pm_node_list_append(&node->elements, element);
- }
+ start = PM_NODE_START(rest);
+ end = PM_NODE_END(rest);
+ }
+
+ pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ rest,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
+ pm_node_list_concat(parser->arena, &node->elements, elements);
return node;
}
@@ -4486,7 +4454,7 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
case PM_NUMBERED_REFERENCE_READ_NODE:
// This will only ever happen in the event of a syntax error, but we
// still need to provide something for the node.
- return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+ return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
default:
assert(false && "unreachable");
return (pm_constant_id_t) -1;
@@ -4499,24 +4467,17 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
static pm_global_variable_and_write_node_t *
pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
-
- *node = (pm_global_variable_and_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_global_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -4524,25 +4485,17 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_global_variable_operator_write_node_t *
pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
-
- *node = (pm_global_variable_operator_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = target->location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_global_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -4551,24 +4504,17 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta
static pm_global_variable_or_write_node_t *
pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
- *node = (pm_global_variable_or_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_global_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -4576,18 +4522,13 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_global_variable_read_node_t *
pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
-
- *node = (pm_global_variable_read_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_global_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -4595,18 +4536,13 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name)
*/
static pm_global_variable_read_node_t *
pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
- pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
-
- *node = (pm_global_variable_read_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .name = name
- };
-
- return node;
+ return pm_global_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ name
+ );
}
/**
@@ -4614,25 +4550,16 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant
*/
static pm_global_variable_write_node_t *
pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
-
- *node = (pm_global_variable_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- },
- },
- .name = pm_global_variable_write_name(parser, target),
- .name_loc = PM_LOCATION_NODE_VALUE(target),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_global_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ pm_global_variable_write_name(parser, target),
+ target->location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -4640,21 +4567,16 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con
*/
static pm_global_variable_write_node_t *
pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
- pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
-
- *node = (pm_global_variable_write_node_t) {
- {
- .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .name = name,
- .name_loc = PM_LOCATION_NULL_VALUE(parser),
- .operator_loc = PM_LOCATION_NULL_VALUE(parser),
- .value = value
- };
-
- return node;
+ return pm_global_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ name,
+ ((pm_location_t) { 0 }),
+ value,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -4663,29 +4585,24 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan
static pm_hash_node_t *
pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
assert(opening != NULL);
- pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
-
- *node = (pm_hash_node_t) {
- {
- .type = PM_HASH_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(opening)
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_NULL_VALUE(parser),
- .elements = { 0 }
- };
- return node;
+ return pm_hash_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, opening),
+ TOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
}
/**
* Append a new element to a hash node.
*/
-static inline void
-pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
- pm_node_list_append(&hash->elements, element);
+static PRISM_INLINE void
+pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
+ pm_node_list_append(arena, &hash->elements, element);
bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
if (static_literal) {
@@ -4696,14 +4613,14 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
}
if (!static_literal) {
- pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
}
}
-static inline void
-pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
- hash->base.location.end = token->end;
- hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
+static PRISM_INLINE void
+pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
+ hash->closing_loc = TOK2LOC(parser, token);
}
/**
@@ -4719,38 +4636,32 @@ pm_if_node_create(pm_parser_t *parser,
const pm_token_t *end_keyword
) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
- const uint8_t *end;
- if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
- end = end_keyword->end;
+ uint32_t start = PM_TOKEN_START(parser, if_keyword);
+ uint32_t end;
+
+ if (end_keyword != NULL) {
+ end = PM_TOKEN_END(parser, end_keyword);
} else if (subsequent != NULL) {
- end = subsequent->location.end;
+ end = PM_NODE_END(subsequent);
} else if (pm_statements_node_body_length(statements) != 0) {
- end = statements->base.location.end;
+ end = PM_NODE_END(statements);
} else {
- end = predicate->location.end;
- }
-
- *node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = if_keyword->start,
- .end = end
- },
- },
- .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
- .statements = statements,
- .subsequent = subsequent,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ end = PM_NODE_END(predicate);
+ }
+
+ return pm_if_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ TOK2LOC(parser, if_keyword),
+ predicate,
+ NTOK2LOC(parser, then_keyword),
+ statements,
+ subsequent,
+ NTOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -4759,30 +4670,22 @@ pm_if_node_create(pm_parser_t *parser,
static pm_if_node_t *
pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, statement, true);
- *node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statement->location.start,
- .end = predicate->location.end
- },
- },
- .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .statements = statements,
- .subsequent = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_if_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_NODES(statement, predicate),
+ TOK2LOC(parser, if_keyword),
+ predicate,
+ ((pm_location_t) { 0 }),
+ statements,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -4799,43 +4702,31 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to
pm_statements_node_t *else_statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, else_statements, false_expression, true);
- pm_token_t end_keyword = not_provided(parser);
- pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
-
- pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
-
- *node = (pm_if_node_t) {
- {
- .type = PM_IF_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = predicate->location.start,
- .end = false_expression->location.end,
- },
- },
- .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .predicate = predicate,
- .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
- .statements = if_statements,
- .subsequent = (pm_node_t *) else_node,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
-
+ pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
+ return pm_if_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_NODES(predicate, false_expression),
+ ((pm_location_t) { 0 }),
+ predicate,
+ TOK2LOC(parser, qmark),
+ if_statements,
+ UP(else_node),
+ ((pm_location_t) { 0 })
+ );
}
-static inline void
-pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
- node->base.location.end = keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+static PRISM_INLINE void
+pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
+ node->end_keyword_loc = TOK2LOC(parser, keyword);
}
-static inline void
-pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
- node->base.location.end = keyword->end;
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
+static PRISM_INLINE void
+pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
+ node->end_keyword_loc = TOK2LOC(parser, keyword);
}
/**
@@ -4843,18 +4734,13 @@ pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword
*/
static pm_implicit_node_t *
pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
- pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
-
- *node = (pm_implicit_node_t) {
- {
- .type = PM_IMPLICIT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = value->location
- },
- .value = value
- };
-
- return node;
+ return pm_implicit_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(value),
+ value
+ );
}
/**
@@ -4864,17 +4750,12 @@ static pm_implicit_rest_node_t *
pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_COMMA);
- pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
-
- *node = (pm_implicit_rest_node_t) {
- {
- .type = PM_IMPLICIT_REST_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }
- };
-
- return node;
+ return pm_implicit_rest_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -4883,28 +4764,33 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
static pm_integer_node_t *
pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER);
- pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
- *node = (pm_integer_node_t) {
- {
- .type = PM_INTEGER_NODE,
- .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .value = { 0 }
- };
+ pm_integer_node_t *node = pm_integer_node_new(
+ parser->arena,
+ ++parser->node_id,
+ base | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_integer_t) { 0 })
+ );
- pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
- switch (base) {
- case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
- case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
- case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
- case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
- default: assert(false && "unreachable"); break;
+ if (parser->integer.lexed) {
+ // The value was already computed during lexing.
+ node->value.value = parser->integer.value;
+ parser->integer.lexed = false;
+ } else {
+ pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
+ switch (base) {
+ case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
+ case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
+ case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
+ case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
+ default: assert(false && "unreachable"); break;
+ }
+
+ pm_integer_parse(&node->value, integer_base, token->start, token->end);
+ pm_integer_arena_move(parser->arena, &node->value);
}
- pm_integer_parse(&node->value, integer_base, token->start, token->end);
return node;
}
@@ -4916,22 +4802,17 @@ static pm_imaginary_node_t *
pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_integer_node_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4942,17 +4823,14 @@ static pm_rational_node_t *
pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
- pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
- *node = (pm_rational_node_t) {
- {
- .type = PM_RATIONAL_NODE,
- .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numerator = { 0 },
- .denominator = { .value = 1, 0 }
- };
+ pm_rational_node_t *node = pm_rational_node_new(
+ parser->arena,
+ ++parser->node_id,
+ base | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_integer_t) { 0 }),
+ ((pm_integer_t) { .value = 1 })
+ );
pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
switch (base) {
@@ -4964,6 +4842,7 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const
}
pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
+ pm_integer_arena_move(parser->arena, &node->numerator);
return node;
}
@@ -4976,22 +4855,17 @@ static pm_imaginary_node_t *
pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
- pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
- *node = (pm_imaginary_node_t) {
- {
- .type = PM_IMAGINARY_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
+ return pm_imaginary_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
.type = PM_TOKEN_INTEGER_RATIONAL,
.start = token->start,
.end = token->end - 1
- }))
- };
-
- return node;
+ })))
+ );
}
/**
@@ -4999,33 +4873,27 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b
*/
static pm_in_node_t *
pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
- pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
+ uint32_t start = PM_TOKEN_START(parser, in_keyword);
+ uint32_t end;
- const uint8_t *end;
if (statements != NULL) {
- end = statements->base.location.end;
- } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
- end = then_keyword->end;
+ end = PM_NODE_END(statements);
+ } else if (then_keyword != NULL) {
+ end = PM_TOKEN_END(parser, then_keyword);
} else {
- end = pattern->location.end;
- }
-
- *node = (pm_in_node_t) {
- {
- .type = PM_IN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = in_keyword->start,
- .end = end
- },
- },
- .pattern = pattern,
- .statements = statements,
- .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
- .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
- };
-
- return node;
+ end = PM_NODE_END(pattern);
+ }
+
+ return pm_in_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ pattern,
+ statements,
+ TOK2LOC(parser, in_keyword),
+ NTOK2LOC(parser, then_keyword)
+ );
}
/**
@@ -5034,24 +4902,17 @@ pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t
static pm_instance_variable_and_write_node_t *
pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
- *node = (pm_instance_variable_and_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_instance_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -5059,25 +4920,17 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari
*/
static pm_instance_variable_operator_write_node_t *
pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
-
- *node = (pm_instance_variable_operator_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
- };
-
- return node;
+ return pm_instance_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
+ );
}
/**
@@ -5086,24 +4939,17 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance
static pm_instance_variable_or_write_node_t *
pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
-
- *node = (pm_instance_variable_or_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .name = target->name,
- .name_loc = target->base.location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
- return node;
+ return pm_instance_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->name,
+ target->base.location,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -5112,18 +4958,14 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia
static pm_instance_variable_read_node_t *
pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
- pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
-
- *node = (pm_instance_variable_read_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .name = pm_parser_constant_id_token(parser, token)
- };
- return node;
+ return pm_instance_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_parser_constant_id_token(parser, token)
+ );
}
/**
@@ -5132,24 +4974,16 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok
*/
static pm_instance_variable_write_node_t *
pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
- pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
- *node = (pm_instance_variable_write_node_t) {
- {
- .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = read_node->base.location.start,
- .end = value->location.end
- }
- },
- .name = read_node->name,
- .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_instance_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(read_node, value),
+ read_node->name,
+ read_node->base.location,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5158,7 +4992,7 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable
* literals.
*/
static void
-pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
+pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
switch (PM_NODE_TYPE(part)) {
case PM_STRING_NODE:
pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
@@ -5186,14 +5020,14 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p
break;
}
case PM_EMBEDDED_VARIABLE_NODE:
- pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
break;
default:
assert(false && "unexpected node type");
break;
}
- pm_node_list_append(parts, part);
+ pm_node_list_append(arena, parts, part);
}
/**
@@ -5201,43 +5035,34 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p
*/
static pm_interpolated_regular_expression_node_t *
pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
- pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
-
- *node = (pm_interpolated_regular_expression_node_t) {
- {
- .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = NULL,
- },
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .parts = { 0 }
- };
-
- return node;
+ return pm_interpolated_regular_expression_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, opening),
+ TOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, opening)
+ );
}
-static inline void
-pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
- if (node->base.location.start > part->location.start) {
- node->base.location.start = part->location.start;
+static PRISM_INLINE void
+pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
+ if (PM_NODE_START(node) > PM_NODE_START(part)) {
+ PM_NODE_START_SET_NODE(node, part);
}
- if (node->base.location.end < part->location.end) {
- node->base.location.end = part->location.end;
+ if (PM_NODE_END(node) < PM_NODE_END(part)) {
+ PM_NODE_LENGTH_SET_NODE(node, part);
}
- pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
+ pm_interpolated_node_append(arena, UP(node), &node->parts, part);
}
-static inline void
+static PRISM_INLINE void
pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
- pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
+ pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
}
/**
@@ -5249,7 +5074,7 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte
* PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a
* single static literal string that can be pushed onto the stack on its own.
* Note that this doesn't necessarily mean that the string will be frozen or
- * not; the instructions in CRuby will be either putobject or putstring,
+ * not; the instructions in CRuby will be either putobject, dupstring or dupchilledstring,
* depending on the combination of `--enable-frozen-string-literal`,
* `# frozen_string_literal: true`, and whether or not there is interpolation.
*
@@ -5263,22 +5088,31 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte
* is necessary to indicate that the string should be left up to the runtime,
* which could potentially use a chilled string otherwise.
*/
-static inline void
-pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
+static PRISM_INLINE void
+pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
+ pm_arena_t *arena = parser->arena;
#define CLEAR_FLAGS(node) \
- node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
+ node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
#define MUTABLE_FLAGS(node) \
- node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
+ node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
- if (node->parts.size == 0 && node->opening_loc.start == NULL) {
- node->base.location.start = part->location.start;
+ if (node->parts.size == 0 && node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(node, part);
}
- node->base.location.end = MAX(node->base.location.end, part->location.end);
+ if (PM_NODE_END(part) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, part);
+ }
switch (PM_NODE_TYPE(part)) {
case PM_STRING_NODE:
+ // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
+ // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
+ // as long as this interpolation only consists of other string literals.
+ if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
+ pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
+ }
part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
break;
case PM_INTERPOLATED_STRING_NODE:
@@ -5330,8 +5164,14 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
break;
case PM_X_STRING_NODE:
case PM_INTERPOLATED_X_STRING_NODE:
- // If this is an x string, then this is a syntax error. But we want
- // to handle it here so that we don't fail the assertion.
+ case PM_SYMBOL_NODE:
+ case PM_INTERPOLATED_SYMBOL_NODE:
+ // These will only happen in error cases. But we want to handle it
+ // here so that we don't fail the assertion.
+ CLEAR_FLAGS(node);
+ pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part)));
+ return;
+ case PM_ERROR_RECOVERY_NODE:
CLEAR_FLAGS(node);
break;
default:
@@ -5339,7 +5179,7 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
break;
}
- pm_node_list_append(&node->parts, part);
+ pm_node_list_append(arena, &node->parts, part);
#undef CLEAR_FLAGS
#undef MUTABLE_FLAGS
@@ -5350,7 +5190,6 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_
*/
static pm_interpolated_string_node_t *
pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
- pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
switch (parser->frozen_string_literal) {
@@ -5362,25 +5201,23 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
break;
}
- *node = (pm_interpolated_string_node_t) {
- {
- .type = PM_INTERPOLATED_STRING_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end,
- },
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .parts = { 0 }
- };
+ uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
+ uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
+
+ pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ NTOK2LOC(parser, closing)
+ );
if (parts != NULL) {
pm_node_t *part;
PM_NODE_LIST_FOREACH(parts, index, part) {
- pm_interpolated_string_node_append(node, part);
+ pm_interpolated_string_node_append(parser, node, part);
}
}
@@ -5391,25 +5228,28 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin
* Set the closing token of the given InterpolatedStringNode node.
*/
static void
-pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
+pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
}
static void
-pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
- if (node->parts.size == 0 && node->opening_loc.start == NULL) {
- node->base.location.start = part->location.start;
+pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
+ if (node->parts.size == 0 && node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(node, part);
}
- pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
- node->base.location.end = MAX(node->base.location.end, part->location.end);
+ pm_interpolated_node_append(arena, UP(node), &node->parts, part);
+
+ if (PM_NODE_END(part) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, part);
+ }
}
static void
-pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
+pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
}
/**
@@ -5417,27 +5257,23 @@ pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node,
*/
static pm_interpolated_symbol_node_t *
pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
- pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
-
- *node = (pm_interpolated_symbol_node_t) {
- {
- .type = PM_INTERPOLATED_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end,
- },
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .parts = { 0 }
- };
+ uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
+ uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
+
+ pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ NTOK2LOC(parser, closing)
+ );
if (parts != NULL) {
pm_node_t *part;
PM_NODE_LIST_FOREACH(parts, index, part) {
- pm_interpolated_symbol_node_append(node, part);
+ pm_interpolated_symbol_node_append(parser->arena, node, part);
}
}
@@ -5449,35 +5285,27 @@ pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *openin
*/
static pm_interpolated_x_string_node_t *
pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
-
- *node = (pm_interpolated_x_string_node_t) {
- {
- .type = PM_INTERPOLATED_X_STRING_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .parts = { 0 }
- };
-
- return node;
+ return pm_interpolated_x_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, closing)
+ );
}
-static inline void
-pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
- pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
- node->base.location.end = part->location.end;
+static PRISM_INLINE void
+pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
+ pm_interpolated_node_append(arena, UP(node), &node->parts, part);
+ PM_NODE_LENGTH_SET_NODE(node, part);
}
-static inline void
-pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
- node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
- node->base.location.end = closing->end;
+static PRISM_INLINE void
+pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
+ node->closing_loc = TOK2LOC(parser, closing);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
}
/**
@@ -5485,17 +5313,12 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node,
*/
static pm_it_local_variable_read_node_t *
pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
-
- *node = (pm_it_local_variable_read_node_t) {
- {
- .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name)
- }
- };
-
- return node;
+ return pm_it_local_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name)
+ );
}
/**
@@ -5503,20 +5326,12 @@ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *nam
*/
static pm_it_parameters_node_t *
pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
- pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
-
- *node = (pm_it_parameters_node_t) {
- {
- .type = PM_IT_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- }
- };
-
- return node;
+ return pm_it_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing)
+ );
}
/**
@@ -5524,37 +5339,31 @@ pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, con
*/
static pm_keyword_hash_node_t *
pm_keyword_hash_node_create(pm_parser_t *parser) {
- pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
-
- *node = (pm_keyword_hash_node_t) {
- .base = {
- .type = PM_KEYWORD_HASH_NODE,
- .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- },
- .elements = { 0 }
- };
-
- return node;
+ return pm_keyword_hash_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
* Append an element to a KeywordHashNode node.
*/
static void
-pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
+pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
// If the element being added is not an AssocNode or does not have a symbol
// key, then we want to turn the SYMBOL_KEYS flag off.
if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
- pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
+ pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
}
- pm_node_list_append(&hash->elements, element);
- if (hash->base.location.start == NULL) {
- hash->base.location.start = element->location.start;
+ pm_node_list_append(arena, &hash->elements, element);
+ if (PM_NODE_LENGTH(hash) == 0) {
+ PM_NODE_START_SET_NODE(hash, element);
}
- hash->base.location.end = element->location.end;
+ PM_NODE_LENGTH_SET_NODE(hash, element);
}
/**
@@ -5562,22 +5371,14 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el
*/
static pm_required_keyword_parameter_node_t *
pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
- pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
-
- *node = (pm_required_keyword_parameter_node_t) {
- {
- .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name->start,
- .end = name->end
- },
- },
- .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
- };
-
- return node;
+ return pm_required_keyword_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_parser_constant_id_raw(parser, name->start, name->end - 1),
+ TOK2LOC(parser, name)
+ );
}
/**
@@ -5585,23 +5386,15 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
*/
static pm_optional_keyword_parameter_node_t *
pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
- pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
-
- *node = (pm_optional_keyword_parameter_node_t) {
- {
- .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name->start,
- .end = value->location.end
- },
- },
- .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
- .value = value
- };
-
- return node;
+ return pm_optional_keyword_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
+ pm_parser_constant_id_raw(parser, name->start, name->end - 1),
+ TOK2LOC(parser, name),
+ value
+ );
}
/**
@@ -5609,23 +5402,15 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t
*/
static pm_keyword_rest_parameter_node_t *
pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
- pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
-
- *node = (pm_keyword_rest_parameter_node_t) {
- {
- .type = PM_KEYWORD_REST_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- },
- },
- .name = pm_parser_optional_constant_id_token(parser, name),
- .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_keyword_rest_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+ name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+ NTOK2LOC(parser, name),
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5641,26 +5426,18 @@ pm_lambda_node_create(
pm_node_t *parameters,
pm_node_t *body
) {
- pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
-
- *node = (pm_lambda_node_t) {
- {
- .type = PM_LAMBDA_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = closing->end
- },
- },
- .locals = *locals,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .parameters = parameters,
- .body = body
- };
-
- return node;
+ return pm_lambda_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, closing),
+ *locals,
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing),
+ parameters,
+ body
+ );
}
/**
@@ -5670,25 +5447,18 @@ static pm_local_variable_and_write_node_t *
pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
- pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
-
- *node = (pm_local_variable_and_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .name = name,
- .depth = depth
- };
- return node;
+ return pm_local_variable_and_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ name,
+ depth
+ );
}
/**
@@ -5696,26 +5466,18 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target,
*/
static pm_local_variable_operator_write_node_t *
pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
- pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
-
- *node = (pm_local_variable_operator_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name_loc = target->location,
- .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .name = name,
- .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
- .depth = depth
- };
-
- return node;
+ return pm_local_variable_operator_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ name,
+ pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
+ depth
+ );
}
/**
@@ -5725,25 +5487,18 @@ static pm_local_variable_or_write_node_t *
pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
- pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
- *node = (pm_local_variable_or_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->location.start,
- .end = value->location.end
- }
- },
- .name_loc = target->location,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value,
- .name = name,
- .depth = depth
- };
-
- return node;
+ return pm_local_variable_or_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(target, value),
+ target->location,
+ TOK2LOC(parser, operator),
+ value,
+ name,
+ depth
+ );
}
/**
@@ -5753,19 +5508,14 @@ static pm_local_variable_read_node_t *
pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
- pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
-
- *node = (pm_local_variable_read_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name)
- },
- .name = name_id,
- .depth = depth
- };
-
- return node;
+ return pm_local_variable_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ name_id,
+ depth
+ );
}
/**
@@ -5792,32 +5542,23 @@ pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t
*/
static pm_local_variable_write_node_t *
pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
- pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
-
- *node = (pm_local_variable_write_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name_loc->start,
- .end = value->location.end
- }
- },
- .name = name,
- .depth = depth,
- .value = value,
- .name_loc = *name_loc,
- .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_local_variable_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
+ name,
+ depth,
+ *name_loc,
+ value,
+ TOK2LOC(parser, operator)
+ );
}
/**
* Returns true if the given bounds comprise `it`.
*/
-static inline bool
+static PRISM_INLINE bool
pm_token_is_it(const uint8_t *start, const uint8_t *end) {
return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
}
@@ -5826,19 +5567,24 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) {
* Returns true if the given bounds comprise a numbered parameter (i.e., they
* are of the form /^_\d$/).
*/
-static inline bool
-pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
- return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
+static PRISM_INLINE bool
+pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
+ return (
+ (length == 2) &&
+ (parser->start[start] == '_') &&
+ (parser->start[start + 1] != '0') &&
+ pm_char_is_decimal_digit(parser->start[start + 1])
+ );
}
/**
* Ensure the given bounds do not comprise a numbered parameter. If they do, add
* an appropriate error message to the parser.
*/
-static inline void
-pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- if (pm_token_is_numbered_parameter(start, end)) {
- PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
+static PRISM_INLINE void
+pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
+ if (pm_token_is_numbered_parameter(parser, start, length)) {
+ PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
}
}
@@ -5848,20 +5594,16 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui
*/
static pm_local_variable_target_node_t *
pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
- pm_refute_numbered_parameter(parser, location->start, location->end);
- pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
-
- *node = (pm_local_variable_target_node_t) {
- {
- .type = PM_LOCAL_VARIABLE_TARGET_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = *location
- },
- .name = name,
- .depth = depth
- };
-
- return node;
+ pm_refute_numbered_parameter(parser, location->start, location->length);
+
+ return pm_local_variable_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = location->start, .length = location->length }),
+ name,
+ depth
+ );
}
/**
@@ -5871,23 +5613,15 @@ static pm_match_predicate_node_t *
pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
pm_assert_value_expression(parser, value);
- pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
-
- *node = (pm_match_predicate_node_t) {
- {
- .type = PM_MATCH_PREDICATE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = value->location.start,
- .end = pattern->location.end
- }
- },
- .value = value,
- .pattern = pattern,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_match_predicate_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(value, pattern),
+ value,
+ pattern,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5897,23 +5631,15 @@ static pm_match_required_node_t *
pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
pm_assert_value_expression(parser, value);
- pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
-
- *node = (pm_match_required_node_t) {
- {
- .type = PM_MATCH_REQUIRED_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = value->location.start,
- .end = pattern->location.end
- }
- },
- .value = value,
- .pattern = pattern,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_match_required_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(value, pattern),
+ value,
+ pattern,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -5921,19 +5647,14 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *
*/
static pm_match_write_node_t *
pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
- pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
-
- *node = (pm_match_write_node_t) {
- {
- .type = PM_MATCH_WRITE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = call->base.location
- },
- .call = call,
- .targets = { 0 }
- };
-
- return node;
+ return pm_match_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(call),
+ call,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
@@ -5941,26 +5662,18 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
*/
static pm_module_node_t *
pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
-
- *node = (pm_module_node_t) {
- {
- .type = PM_MODULE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = module_keyword->start,
- .end = end_keyword->end
- }
- },
- .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
- .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
- .constant_path = constant_path,
- .body = body,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
- .name = pm_parser_constant_id_token(parser, name)
- };
-
- return node;
+ return pm_module_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
+ (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
+ TOK2LOC(parser, module_keyword),
+ constant_path,
+ body,
+ TOK2LOC(parser, end_keyword),
+ pm_parser_constant_id_token(parser, name)
+ );
}
/**
@@ -5968,22 +5681,17 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const
*/
static pm_multi_target_node_t *
pm_multi_target_node_create(pm_parser_t *parser) {
- pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
-
- *node = (pm_multi_target_node_t) {
- {
- .type = PM_MULTI_TARGET_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = NULL, .end = NULL }
- },
- .lefts = { 0 },
- .rest = NULL,
- .rights = { 0 },
- .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_multi_target_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -5996,27 +5704,27 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t
node->rest = target;
} else {
pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
- pm_node_list_append(&node->rights, target);
+ pm_node_list_append(parser->arena, &node->rights, target);
}
} else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
if (node->rest == NULL) {
node->rest = target;
} else {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
- pm_node_list_append(&node->rights, target);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
+ pm_node_list_append(parser->arena, &node->rights, target);
}
} else if (node->rest == NULL) {
- pm_node_list_append(&node->lefts, target);
+ pm_node_list_append(parser->arena, &node->lefts, target);
} else {
- pm_node_list_append(&node->rights, target);
+ pm_node_list_append(parser->arena, &node->rights, target);
}
- if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
- node->base.location.start = target->location.start;
+ if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
+ PM_NODE_START_SET_NODE(node, target);
}
- if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
- node->base.location.end = target->location.end;
+ if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
+ PM_NODE_LENGTH_SET_NODE(node, target);
}
}
@@ -6024,18 +5732,19 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t
* Set the opening of a MultiTargetNode node.
*/
static void
-pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
- node->base.location.start = lparen->start;
- node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
+pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
+ PM_NODE_START_SET_TOKEN(parser, node, lparen);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
+ node->lparen_loc = TOK2LOC(parser, lparen);
}
/**
* Set the closing of a MultiTargetNode node.
*/
static void
-pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
- node->base.location.end = rparen->end;
- node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
+pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
+ node->rparen_loc = TOK2LOC(parser, rparen);
}
/**
@@ -6043,32 +5752,21 @@ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t
*/
static pm_multi_write_node_t *
pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
- pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
-
- *node = (pm_multi_write_node_t) {
- {
- .type = PM_MULTI_WRITE_NODE,
- .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = target->base.location.start,
- .end = value->location.end
- }
- },
- .lefts = target->lefts,
- .rest = target->rest,
- .rights = target->rights,
- .lparen_loc = target->lparen_loc,
- .rparen_loc = target->rparen_loc,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- // Explicitly do not call pm_node_destroy here because we want to keep
- // around all of the information within the MultiWriteNode node.
- xfree(target);
-
- return node;
+ /* The target is no longer necessary because we have reused its children. It
+ * is arena-allocated so no explicit free is needed. */
+ return pm_multi_write_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
+ PM_LOCATION_INIT_NODES(target, value),
+ target->lefts,
+ target->rest,
+ target->rights,
+ target->lparen_loc,
+ target->rparen_loc,
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -6077,22 +5775,15 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target,
static pm_next_node_t *
pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
- pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
-
- *node = (pm_next_node_t) {
- {
- .type = PM_NEXT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .arguments = arguments
- };
- return node;
+ return pm_next_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+ arguments,
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -6101,16 +5792,31 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments
static pm_nil_node_t *
pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_NIL);
- pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
- *node = (pm_nil_node_t) {{
- .type = PM_NIL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
+ return pm_nil_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
+}
- return node;
+/**
+ * Allocate and initialize a new NoKeywordsParameterNode node.
+ */
+static pm_no_block_parameter_node_t *
+pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
+ assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
+ assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
+
+ return pm_no_block_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, keyword)
+ );
}
/**
@@ -6120,41 +5826,29 @@ static pm_no_keywords_parameter_node_t *
pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
- pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
-
- *node = (pm_no_keywords_parameter_node_t) {
- {
- .type = PM_NO_KEYWORDS_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = keyword->end
- }
- },
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
- };
- return node;
+ return pm_no_keywords_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, keyword)
+ );
}
/**
* Allocate and initialize a new NumberedParametersNode node.
*/
static pm_numbered_parameters_node_t *
-pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
- pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
-
- *node = (pm_numbered_parameters_node_t) {
- {
- .type = PM_NUMBERED_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = *location
- },
- .maximum = maximum
- };
-
- return node;
+pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
+ return pm_numbered_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ maximum
+ );
}
/**
@@ -6190,14 +5884,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
unsigned long value = strtoul(digits, &endptr, 10);
if ((digits == endptr) || (*endptr != '\0')) {
- pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+ pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
value = 0;
}
- xfree(digits);
+ xfree_sized(digits, sizeof(char) * (length + 1));
if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
- PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
+ PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
value = 0;
}
@@ -6212,18 +5906,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to
static pm_numbered_reference_read_node_t *
pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
- pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
- *node = (pm_numbered_reference_read_node_t) {
- {
- .type = PM_NUMBERED_REFERENCE_READ_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(name),
- },
- .number = pm_numbered_reference_read_node_number(parser, name)
- };
-
- return node;
+ return pm_numbered_reference_read_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, name),
+ pm_numbered_reference_read_node_number(parser, name)
+ );
}
/**
@@ -6231,24 +5921,16 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na
*/
static pm_optional_parameter_node_t *
pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
- pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
-
- *node = (pm_optional_parameter_node_t) {
- {
- .type = PM_OPTIONAL_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = name->start,
- .end = value->location.end
- }
- },
- .name = pm_parser_constant_id_token(parser, name),
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .value = value
- };
-
- return node;
+ return pm_optional_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
+ pm_parser_constant_id_token(parser, name),
+ TOK2LOC(parser, name),
+ TOK2LOC(parser, operator),
+ value
+ );
}
/**
@@ -6258,23 +5940,15 @@ static pm_or_node_t *
pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
- pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
-
- *node = (pm_or_node_t) {
- {
- .type = PM_OR_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = left->location.start,
- .end = right->location.end
- }
- },
- .left = left,
- .right = right,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_or_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(left, right),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6282,24 +5956,19 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat
*/
static pm_parameters_node_t *
pm_parameters_node_create(pm_parser_t *parser) {
- pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
-
- *node = (pm_parameters_node_t) {
- {
- .type = PM_PARAMETERS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
- },
- .rest = NULL,
- .keyword_rest = NULL,
- .block = NULL,
- .requireds = { 0 },
- .optionals = { 0 },
- .posts = { 0 },
- .keywords = { 0 }
- };
-
- return node;
+ return pm_parameters_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 }),
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ ((pm_node_list_t) { 0 }),
+ ((pm_node_list_t) { 0 }),
+ NULL,
+ NULL
+ );
}
/**
@@ -6307,16 +5976,12 @@ pm_parameters_node_create(pm_parser_t *parser) {
*/
static void
pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
- if (params->base.location.start == NULL) {
- params->base.location.start = param->location.start;
- } else {
- params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
+ if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
+ PM_NODE_START_SET_NODE(params, param);
}
- if (params->base.location.end == NULL) {
- params->base.location.end = param->location.end;
- } else {
- params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
+ if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
+ PM_NODE_LENGTH_SET_NODE(params, param);
}
}
@@ -6324,27 +5989,27 @@ pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param)
* Append a required parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
pm_parameters_node_location_set(params, param);
- pm_node_list_append(&params->requireds, param);
+ pm_node_list_append(arena, &params->requireds, param);
}
/**
* Append an optional parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
- pm_parameters_node_location_set(params, (pm_node_t *) param);
- pm_node_list_append(&params->optionals, (pm_node_t *) param);
+pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
+ pm_parameters_node_location_set(params, UP(param));
+ pm_node_list_append(arena, &params->optionals, UP(param));
}
/**
* Append a post optional arguments parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
pm_parameters_node_location_set(params, param);
- pm_node_list_append(&params->posts, param);
+ pm_node_list_append(arena, &params->posts, param);
}
/**
@@ -6360,9 +6025,9 @@ pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
* Append a keyword parameter to a ParametersNode node.
*/
static void
-pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
+pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
pm_parameters_node_location_set(params, param);
- pm_node_list_append(&params->keywords, param);
+ pm_node_list_append(arena, &params->keywords, param);
}
/**
@@ -6379,9 +6044,9 @@ pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *par
* Set the block parameter on a ParametersNode node.
*/
static void
-pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
+pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
assert(params->block == NULL);
- pm_parameters_node_location_set(params, (pm_node_t *) param);
+ pm_parameters_node_location_set(params, param);
params->block = param;
}
@@ -6390,22 +6055,14 @@ pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_no
*/
static pm_program_node_t *
pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
- pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
-
- *node = (pm_program_node_t) {
- {
- .type = PM_PROGRAM_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statements == NULL ? parser->start : statements->base.location.start,
- .end = statements == NULL ? parser->end : statements->base.location.end
- }
- },
- .locals = *locals,
- .statements = statements
- };
-
- return node;
+ return pm_program_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODE(statements),
+ *locals,
+ statements
+ );
}
/**
@@ -6413,24 +6070,15 @@ pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_st
*/
static pm_parentheses_node_t *
pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
- pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
-
- *node = (pm_parentheses_node_t) {
- {
- .type = PM_PARENTHESES_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
- .body = body,
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_parentheses_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ body,
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -6438,24 +6086,16 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no
*/
static pm_pinned_expression_node_t *
pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
- pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
-
- *node = (pm_pinned_expression_node_t) {
- {
- .type = PM_PINNED_EXPRESSION_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = rparen->end
- }
- },
- .expression = expression,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
- .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
- };
-
- return node;
+ return pm_pinned_expression_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
+ expression,
+ TOK2LOC(parser, operator),
+ TOK2LOC(parser, lparen),
+ TOK2LOC(parser, rparen)
+ );
}
/**
@@ -6463,22 +6103,14 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con
*/
static pm_pinned_variable_node_t *
pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
- pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
-
- *node = (pm_pinned_variable_node_t) {
- {
- .type = PM_PINNED_VARIABLE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = variable->location.end
- }
- },
- .variable = variable,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_pinned_variable_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
+ variable,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6486,24 +6118,16 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator,
*/
static pm_post_execution_node_t *
pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
-
- *node = (pm_post_execution_node_t) {
- {
- .type = PM_POST_EXECUTION_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end
- }
- },
- .statements = statements,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_post_execution_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ statements,
+ TOK2LOC(parser, keyword),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -6511,24 +6135,16 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co
*/
static pm_pre_execution_node_t *
pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
- pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
-
- *node = (pm_pre_execution_node_t) {
- {
- .type = PM_PRE_EXECUTION_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end
- }
- },
- .statements = statements,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
- };
-
- return node;
+ return pm_pre_execution_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ statements,
+ TOK2LOC(parser, keyword),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, closing)
+ );
}
/**
@@ -6538,8 +6154,6 @@ static pm_range_node_t *
pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
pm_assert_value_expression(parser, left);
pm_assert_value_expression(parser, right);
-
- pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
pm_node_flags_t flags = 0;
// Indicate that this node is an exclusive range if the operator is `...`.
@@ -6557,22 +6171,18 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
flags |= PM_NODE_FLAG_STATIC_LITERAL;
}
- *node = (pm_range_node_t) {
- {
- .type = PM_RANGE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = (left == NULL ? operator->start : left->location.start),
- .end = (right == NULL ? operator->end : right->location.end)
- }
- },
- .left = left,
- .right = right,
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
+ uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
+ uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
- return node;
+ return pm_range_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ left,
+ right,
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6581,15 +6191,13 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
static pm_redo_node_t *
pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_REDO);
- pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
- *node = (pm_redo_node_t) {{
- .type = PM_REDO_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_redo_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6598,31 +6206,22 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_regular_expression_node_t *
pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
-
- *node = (pm_regular_expression_node_t) {
- {
- .type = PM_REGULAR_EXPRESSION_NODE,
- .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = MIN(opening->start, closing->start),
- .end = MAX(opening->end, closing->end)
- }
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .content_loc = PM_LOCATION_TOKEN_VALUE(content),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *unescaped
- };
-
- return node;
+ return pm_regular_expression_node_new(
+ parser->arena,
+ ++parser->node_id,
+ pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, content),
+ TOK2LOC(parser, closing),
+ *unescaped
+ );
}
/**
* Allocate a new initialize a new RegularExpressionNode node.
*/
-static inline pm_regular_expression_node_t *
+static PRISM_INLINE pm_regular_expression_node_t *
pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
}
@@ -6632,18 +6231,13 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
*/
static pm_required_parameter_node_t *
pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
- pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
-
- *node = (pm_required_parameter_node_t) {
- {
- .type = PM_REQUIRED_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- },
- .name = pm_parser_constant_id_token(parser, token)
- };
-
- return node;
+ return pm_required_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ pm_parser_constant_id_token(parser, token)
+ );
}
/**
@@ -6651,23 +6245,15 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token)
*/
static pm_rescue_modifier_node_t *
pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
- pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
-
- *node = (pm_rescue_modifier_node_t) {
- {
- .type = PM_RESCUE_MODIFIER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = expression->location.start,
- .end = rescue_expression->location.end
- }
- },
- .expression = expression,
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .rescue_expression = rescue_expression
- };
-
- return node;
+ return pm_rescue_modifier_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_NODES(expression, rescue_expression),
+ expression,
+ TOK2LOC(parser, keyword),
+ rescue_expression
+ );
}
/**
@@ -6675,29 +6261,24 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const
*/
static pm_rescue_node_t *
pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
- pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
-
- *node = (pm_rescue_node_t) {
- {
- .type = PM_RESCUE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(keyword)
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .reference = NULL,
- .statements = NULL,
- .subsequent = NULL,
- .exceptions = { 0 }
- };
-
- return node;
+ return pm_rescue_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, keyword),
+ TOK2LOC(parser, keyword),
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL,
+ ((pm_location_t) { 0 }),
+ NULL,
+ NULL
+ );
}
-static inline void
-pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
- node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
+static PRISM_INLINE void
+pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
+ node->operator_loc = TOK2LOC(parser, operator);
}
/**
@@ -6706,7 +6287,7 @@ pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator)
static void
pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
node->reference = reference;
- node->base.location.end = reference->location.end;
+ PM_NODE_LENGTH_SET_NODE(node, reference);
}
/**
@@ -6716,7 +6297,7 @@ static void
pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
node->statements = statements;
if (pm_statements_node_body_length(statements) > 0) {
- node->base.location.end = statements->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, statements);
}
}
@@ -6726,16 +6307,16 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat
static void
pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
node->subsequent = subsequent;
- node->base.location.end = subsequent->base.location.end;
+ PM_NODE_LENGTH_SET_NODE(node, subsequent);
}
/**
* Append an exception node to a rescue node, and update the location.
*/
static void
-pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
- pm_node_list_append(&node->exceptions, exception);
- node->base.location.end = exception->location.end;
+pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
+ pm_node_list_append(arena, &node->exceptions, exception);
+ PM_NODE_LENGTH_SET_NODE(node, exception);
}
/**
@@ -6743,23 +6324,15 @@ pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
*/
static pm_rest_parameter_node_t *
pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
- pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
-
- *node = (pm_rest_parameter_node_t) {
- {
- .type = PM_REST_PARAMETER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
- }
- },
- .name = pm_parser_optional_constant_id_token(parser, name),
- .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
- };
-
- return node;
+ return pm_rest_parameter_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
+ name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
+ NTOK2LOC(parser, name),
+ TOK2LOC(parser, operator)
+ );
}
/**
@@ -6768,15 +6341,13 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c
static pm_retry_node_t *
pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_RETRY);
- pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
- *node = (pm_retry_node_t) {{
- .type = PM_RETRY_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_retry_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6784,22 +6355,14 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_return_node_t *
pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
- pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
-
- *node = (pm_return_node_t) {
- {
- .type = PM_RETURN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .arguments = arguments
- };
-
- return node;
+ return pm_return_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
+ TOK2LOC(parser, keyword),
+ arguments
+ );
}
/**
@@ -6808,15 +6371,13 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen
static pm_self_node_t *
pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_SELF);
- pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
- *node = (pm_self_node_t) {{
- .type = PM_SELF_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_self_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6824,19 +6385,13 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_shareable_constant_node_t *
pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
- pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
-
- *node = (pm_shareable_constant_node_t) {
- {
- .type = PM_SHAREABLE_CONSTANT_NODE,
- .flags = (pm_node_flags_t) value,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NODE_VALUE(write)
- },
- .write = write
- };
-
- return node;
+ return pm_shareable_constant_node_new(
+ parser->arena,
+ ++parser->node_id,
+ (pm_node_flags_t) value,
+ PM_LOCATION_INIT_NODE(write),
+ write
+ );
}
/**
@@ -6844,26 +6399,18 @@ pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shar
*/
static pm_singleton_class_node_t *
pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
- pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
-
- *node = (pm_singleton_class_node_t) {
- {
- .type = PM_SINGLETON_CLASS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = class_keyword->start,
- .end = end_keyword->end
- }
- },
- .locals = *locals,
- .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .expression = expression,
- .body = body,
- .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
- };
-
- return node;
+ return pm_singleton_class_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
+ *locals,
+ TOK2LOC(parser, class_keyword),
+ TOK2LOC(parser, operator),
+ expression,
+ body,
+ TOK2LOC(parser, end_keyword)
+ );
}
/**
@@ -6872,16 +6419,13 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local
static pm_source_encoding_node_t *
pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
- pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
-
- *node = (pm_source_encoding_node_t) {{
- .type = PM_SOURCE_ENCODING_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_source_encoding_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6889,7 +6433,6 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_source_file_node_t*
pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
- pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
pm_node_flags_t flags = 0;
@@ -6903,17 +6446,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
break;
}
- *node = (pm_source_file_node_t) {
- {
- .type = PM_SOURCE_FILE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
- },
- .filepath = parser->filepath
- };
-
- return node;
+ return pm_source_file_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKEN(parser, file_keyword),
+ parser->filepath
+ );
}
/**
@@ -6922,16 +6461,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
static pm_source_line_node_t *
pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD___LINE__);
- pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
- *node = (pm_source_line_node_t) {{
- .type = PM_SOURCE_LINE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
-
- return node;
+ return pm_source_line_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -6939,22 +6475,14 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_splat_node_t *
pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
- pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
-
- *node = (pm_splat_node_t) {
- {
- .type = PM_SPLAT_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = operator->start,
- .end = (expression == NULL ? operator->end : expression->location.end)
- }
- },
- .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
- .expression = expression
- };
-
- return node;
+ return pm_splat_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
+ TOK2LOC(parser, operator),
+ expression
+ );
}
/**
@@ -6962,18 +6490,13 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t
*/
static pm_statements_node_t *
pm_statements_node_create(pm_parser_t *parser) {
- pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
-
- *node = (pm_statements_node_t) {
- {
- .type = PM_STATEMENTS_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .body = { 0 }
- };
-
- return node;
+ return pm_statements_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_node_list_t) { 0 })
+ );
}
/**
@@ -6985,25 +6508,17 @@ pm_statements_node_body_length(pm_statements_node_t *node) {
}
/**
- * Set the location of the given StatementsNode.
- */
-static void
-pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
- node->base.location = (pm_location_t) { .start = start, .end = end };
-}
-
-/**
* Update the location of the statements node based on the statement that is
* being added to the list.
*/
-static inline void
+static PRISM_INLINE void
pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
- if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
- node->base.location.start = statement->location.start;
+ if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
+ PM_NODE_START_SET_NODE(node, statement);
}
- if (statement->location.end > node->base.location.end) {
- node->base.location.end = statement->location.end;
+ if (PM_NODE_END(statement) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, statement);
}
}
@@ -7030,7 +6545,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
}
}
- pm_node_list_append(&node->body, statement);
+ pm_node_list_append(parser->arena, &node->body, statement);
if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
}
@@ -7038,18 +6553,17 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node,
* Prepend a new node to the given StatementsNode node's body.
*/
static void
-pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
+pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
pm_statements_node_body_update(node, statement);
- pm_node_list_prepend(&node->body, statement);
+ pm_node_list_prepend(arena, &node->body, statement);
pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
}
/**
* Allocate a new StringNode node with the current string on the parser.
*/
-static inline pm_string_node_t *
+static PRISM_INLINE pm_string_node_t *
pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
- pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
switch (parser->frozen_string_literal) {
@@ -7061,23 +6575,19 @@ pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening,
break;
}
- *node = (pm_string_node_t) {
- {
- .type = PM_STRING_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
- .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
- }
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .content_loc = PM_LOCATION_TOKEN_VALUE(content),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *string
- };
+ uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
+ uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
- return node;
+ return pm_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ TOK2LOC(parser, content),
+ NTOK2LOC(parser, closing),
+ *string
+ );
}
/**
@@ -7105,30 +6615,21 @@ pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
static pm_super_node_t *
pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
- pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
-
- const uint8_t *end = pm_arguments_end(arguments);
- if (end == NULL) {
- assert(false && "unreachable");
- }
- *node = (pm_super_node_t) {
- {
- .type = PM_SUPER_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = end,
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .lparen_loc = arguments->opening_loc,
- .arguments = arguments->arguments,
- .rparen_loc = arguments->closing_loc,
- .block = arguments->block
- };
-
- return node;
+ const pm_location_t *end = pm_arguments_end(arguments);
+ assert(end != NULL && "unreachable");
+
+ return pm_super_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
+ TOK2LOC(parser, keyword),
+ arguments->opening_loc,
+ arguments->arguments,
+ arguments->closing_loc,
+ arguments->block
+ );
}
/**
@@ -7156,7 +6657,7 @@ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *locat
size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
if (width == 0) {
- pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
break;
}
@@ -7176,7 +6677,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca
size_t width = encoding->char_width(cursor, end - cursor);
if (width == 0) {
- pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
break;
}
@@ -7193,7 +6694,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca
* If the validate flag is set, then it will check the contents of the symbol
* to ensure that all characters are valid in the encoding.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
if (parser->explicit_encoding != NULL) {
// A Symbol may optionally have its encoding explicitly set. This will
@@ -7218,160 +6719,31 @@ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_
return 0;
}
-static pm_node_flags_t
-parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
- assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
- (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
- (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
- (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
-
- // There's special validation logic used if a string does not contain any character escape sequences.
- if (parser->explicit_encoding == NULL) {
- // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
- // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
- // the US-ASCII encoding.
- if (ascii_only) {
- return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
- }
-
- if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
- if (!ascii_only) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
- }
- } else if (parser->encoding != modifier_encoding) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
-
- if (modifier == 'n' && !ascii_only) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
- }
- }
-
- return flags;
- }
-
- // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
- bool mixed_encoding = false;
-
- if (mixed_encoding) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
- } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
- // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
- bool valid_string_in_modifier_encoding = true;
-
- if (!valid_string_in_modifier_encoding) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
- }
- } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
- // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
- if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
- }
- }
-
- // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
- return flags;
-}
-
-/**
- * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
- * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
- * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
- * may be explicitly set with an escape sequence.
- */
-static pm_node_flags_t
-parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
- // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
- bool valid_unicode_range = true;
- if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
- return flags;
- }
-
- // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
- // to multi-byte characters are allowed.
- if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
- // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
- // following error message appearing twice. We do the same for compatibility.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
- }
-
- /**
- * Start checking modifier flags. We need to process these before considering any explicit encodings that may have
- * been set by character literals. The order in which the encoding modifiers is checked does not matter. In the
- * event that both an encoding modifier and an explicit encoding would result in the same encoding we do not set
- * the corresponding "forced_<encoding>" flag. Instead, the caller should check the encoding modifier flag and
- * determine the encoding that way.
- */
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
- }
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
- }
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
- }
-
- if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
- return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
- }
-
- // At this point no encoding modifiers will be present on the regular expression as they would have already
- // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
- // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
- if (ascii_only) {
- return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
- }
-
- // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
- // or by specifying a modifier.
- //
- // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
- if (parser->explicit_encoding != NULL) {
- if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
- return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
- } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
- return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
- }
- }
-
- return 0;
-}
-
/**
* Allocate and initialize a new SymbolNode node with the given unescaped
* string.
*/
static pm_symbol_node_t *
pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
- pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
-
- *node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
- .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
- }
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .value_loc = PM_LOCATION_TOKEN_VALUE(value),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *unescaped
- };
-
- return node;
+ uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
+ uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
+
+ return pm_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL | flags,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ NTOK2LOC(parser, opening),
+ NTOK2LOC(parser, value),
+ NTOK2LOC(parser, closing),
+ *unescaped
+ );
}
/**
* Allocate and initialize a new SymbolNode node.
*/
-static inline pm_symbol_node_t *
+static PRISM_INLINE pm_symbol_node_t *
pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
}
@@ -7391,35 +6763,15 @@ pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *open
*/
static pm_symbol_node_t *
pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
- pm_symbol_node_t *node;
-
- switch (token->type) {
- case PM_TOKEN_LABEL: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
+ assert(token->type == PM_TOKEN_LABEL);
- pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
- node = pm_symbol_node_create(parser, &opening, &label, &closing);
+ pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
+ pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
+ pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
- assert((label.end - label.start) >= 0);
- pm_string_shared_init(&node->unescaped, label.start, label.end);
- pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
-
- break;
- }
- case PM_TOKEN_MISSING: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
- node = pm_symbol_node_create(parser, &opening, &label, &closing);
- break;
- }
- default:
- assert(false && "unreachable");
- node = NULL;
- break;
- }
+ assert((label.end - label.start) >= 0);
+ pm_string_shared_init(&node->unescaped, label.start, label.end);
+ pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
return node;
}
@@ -7429,18 +6781,16 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_symbol_node_t *
pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
- pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
-
- *node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .value_loc = PM_LOCATION_NULL_VALUE(parser),
- .unescaped = { 0 }
- };
+ pm_symbol_node_t *node = pm_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_string_t) { 0 })
+ );
pm_string_constant_init(&node->unescaped, content, strlen(content));
return node;
@@ -7450,21 +6800,29 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
* Check if the given node is a label in a hash.
*/
static bool
-pm_symbol_node_label_p(pm_node_t *node) {
- const uint8_t *end = NULL;
+pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
+ const pm_location_t *location = NULL;
switch (PM_NODE_TYPE(node)) {
- case PM_SYMBOL_NODE:
- end = ((pm_symbol_node_t *) node)->closing_loc.end;
+ case PM_SYMBOL_NODE: {
+ const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
+ if (cast->closing_loc.length > 0) {
+ location = &cast->closing_loc;
+ }
break;
- case PM_INTERPOLATED_SYMBOL_NODE:
- end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
+ }
+ case PM_INTERPOLATED_SYMBOL_NODE: {
+ const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
+ if (cast->closing_loc.length > 0) {
+ location = &cast->closing_loc;
+ }
break;
+ }
default:
return false;
}
- return (end != NULL) && (end[-1] == ':');
+ return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
}
/**
@@ -7472,32 +6830,26 @@ pm_symbol_node_label_p(pm_node_t *node) {
*/
static pm_symbol_node_t *
pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
- pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
+ pm_symbol_node_t *new_node = pm_symbol_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ node->content_loc,
+ TOK2LOC(parser, closing),
+ node->unescaped
+ );
- *new_node = (pm_symbol_node_t) {
- {
- .type = PM_SYMBOL_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- }
- },
- .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
- .value_loc = node->content_loc,
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .unescaped = node->unescaped
+ pm_token_t content = {
+ .type = PM_TOKEN_IDENTIFIER,
+ .start = parser->start + node->content_loc.start,
+ .end = parser->start + node->content_loc.start + node->content_loc.length
};
- pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
- pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
-
- // We are explicitly _not_ using pm_node_destroy here because we don't want
- // to trash the unescaped string. We could instead copy the string if we
- // know that it is owned, but we're taking the fast path for now.
- xfree(node);
+ pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
+ /* The old node is arena-allocated so no explicit free is needed. */
return new_node;
}
@@ -7506,7 +6858,6 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const
*/
static pm_string_node_t *
pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
- pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
pm_node_flags_t flags = 0;
switch (parser->frozen_string_literal) {
@@ -7518,24 +6869,18 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
break;
}
- *new_node = (pm_string_node_t) {
- {
- .type = PM_STRING_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = node->base.location
- },
- .opening_loc = node->opening_loc,
- .content_loc = node->value_loc,
- .closing_loc = node->closing_loc,
- .unescaped = node->unescaped
- };
-
- // We are explicitly _not_ using pm_node_destroy here because we don't want
- // to trash the unescaped string. We could instead copy the string if we
- // know that it is owned, but we're taking the fast path for now.
- xfree(node);
+ pm_string_node_t *new_node = pm_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_NODE(node),
+ node->opening_loc,
+ node->value_loc,
+ node->closing_loc,
+ node->unescaped
+ );
+ /* The old node is arena-allocated so no explicit free is needed. */
return new_node;
}
@@ -7545,16 +6890,13 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
static pm_true_node_t *
pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_TRUE);
- pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
-
- *node = (pm_true_node_t) {{
- .type = PM_TRUE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token)
- }};
- return node;
+ return pm_true_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_TOKEN(parser, token)
+ );
}
/**
@@ -7562,16 +6904,12 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
*/
static pm_true_node_t *
pm_true_node_synthesized_create(pm_parser_t *parser) {
- pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
-
- *node = (pm_true_node_t) {{
- .type = PM_TRUE_NODE,
- .flags = PM_NODE_FLAG_STATIC_LITERAL,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = { .start = parser->start, .end = parser->end }
- }};
-
- return node;
+ return pm_true_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_STATIC_LITERAL,
+ PM_LOCATION_INIT_UNSET
+ );
}
/**
@@ -7580,28 +6918,24 @@ pm_true_node_synthesized_create(pm_parser_t *parser) {
static pm_undef_node_t *
pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
- pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
-
- *node = (pm_undef_node_t) {
- {
- .type = PM_UNDEF_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_TOKEN_VALUE(token),
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
- .names = { 0 }
- };
- return node;
+ return pm_undef_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, token),
+ ((pm_node_list_t) { 0 }),
+ TOK2LOC(parser, token)
+ );
}
/**
* Append a name to an undef node.
*/
static void
-pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
- node->base.location.end = name->location.end;
- pm_node_list_append(&node->names, name);
+pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
+ PM_NODE_LENGTH_SET_NODE(node, name);
+ pm_node_list_append(arena, &node->names, name);
}
/**
@@ -7610,34 +6944,20 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
static pm_unless_node_t *
pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
-
- const uint8_t *end;
- if (statements != NULL) {
- end = statements->base.location.end;
- } else {
- end = predicate->location.end;
- }
-
- *node = (pm_unless_node_t) {
- {
- .type = PM_UNLESS_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
- .statements = statements,
- .else_clause = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ pm_node_t *end = statements == NULL ? predicate : UP(statements);
+
+ return pm_unless_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
+ TOK2LOC(parser, keyword),
+ predicate,
+ NTOK2LOC(parser, then_keyword),
+ statements,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
/**
@@ -7646,36 +6966,28 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t
static pm_unless_node_t *
pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, statement, true);
- *node = (pm_unless_node_t) {
- {
- .type = PM_UNLESS_NODE,
- .flags = PM_NODE_FLAG_NEWLINE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statement->location.start,
- .end = predicate->location.end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
- .predicate = predicate,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .statements = statements,
- .else_clause = NULL,
- .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
- };
-
- return node;
+ return pm_unless_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_NODE_FLAG_NEWLINE,
+ PM_LOCATION_INIT_NODES(statement, predicate),
+ TOK2LOC(parser, unless_keyword),
+ predicate,
+ ((pm_location_t) { 0 }),
+ statements,
+ NULL,
+ ((pm_location_t) { 0 })
+ );
}
-static inline void
-pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
- node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
- node->base.location.end = end_keyword->end;
+static PRISM_INLINE void
+pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
+ node->end_keyword_loc = TOK2LOC(parser, end_keyword);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
}
/**
@@ -7690,7 +7002,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
// All of the block exits that we want to remove should be within the
// statements, and since we are modifying the statements, we shouldn't have
// to check the end location.
- const uint8_t *start = statements->base.location.start;
+ uint32_t start = statements->base.location.start;
for (size_t index = parser->current_block_exits->size; index > 0; index--) {
pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
@@ -7706,27 +7018,19 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
*/
static pm_until_node_t *
pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- *node = (pm_until_node_t) {
- {
- .type = PM_UNTIL_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end,
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_until_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ TOK2LOC(parser, keyword),
+ NTOK2LOC(parser, do_keyword),
+ TOK2LOC(parser, closing),
+ predicate,
+ statements
+ );
}
/**
@@ -7734,28 +7038,20 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
*/
static pm_until_node_t *
pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
pm_loop_modifier_block_exits(parser, statements);
- *node = (pm_until_node_t) {
- {
- .type = PM_UNTIL_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statements->base.location.start,
- .end = predicate->location.end,
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_until_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_NODES(statements, predicate),
+ TOK2LOC(parser, keyword),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ predicate,
+ statements
+ );
}
/**
@@ -7763,42 +7059,34 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
*/
static pm_when_node_t *
pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
- pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
-
- *node = (pm_when_node_t) {
- {
- .type = PM_WHEN_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = NULL
- }
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .statements = NULL,
- .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .conditions = { 0 }
- };
-
- return node;
+ return pm_when_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_TOKEN(parser, keyword),
+ TOK2LOC(parser, keyword),
+ ((pm_node_list_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ NULL
+ );
}
/**
* Append a new condition to a when node.
*/
static void
-pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
- node->base.location.end = condition->location.end;
- pm_node_list_append(&node->conditions, condition);
+pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
+ PM_NODE_LENGTH_SET_NODE(node, condition);
+ pm_node_list_append(arena, &node->conditions, condition);
}
/**
* Set the location of the then keyword of a when node.
*/
-static inline void
-pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
- node->base.location.end = then_keyword->end;
- node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
+static PRISM_INLINE void
+pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
+ node->then_keyword_loc = TOK2LOC(parser, then_keyword);
}
/**
@@ -7806,8 +7094,8 @@ pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_k
*/
static void
pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
- if (statements->base.location.end > node->base.location.end) {
- node->base.location.end = statements->base.location.end;
+ if (PM_NODE_END(statements) > PM_NODE_END(node)) {
+ PM_NODE_LENGTH_SET_NODE(node, statements);
}
node->statements = statements;
@@ -7818,27 +7106,19 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
*/
static pm_while_node_t *
pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
- *node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = closing->end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
- .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_while_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
+ TOK2LOC(parser, keyword),
+ NTOK2LOC(parser, do_keyword),
+ TOK2LOC(parser, closing),
+ predicate,
+ statements
+ );
}
/**
@@ -7846,28 +7126,20 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
*/
static pm_while_node_t *
pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
- pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
pm_loop_modifier_block_exits(parser, statements);
- *node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .flags = flags,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = statements->base.location.start,
- .end = predicate->location.end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_while_node_new(
+ parser->arena,
+ ++parser->node_id,
+ flags,
+ PM_LOCATION_INIT_NODES(statements, predicate),
+ TOK2LOC(parser, keyword),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ predicate,
+ statements
+ );
}
/**
@@ -7875,22 +7147,17 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
*/
static pm_while_node_t *
pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
- pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
-
- *node = (pm_while_node_t) {
- {
- .type = PM_WHILE_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = PM_LOCATION_NULL_VALUE(parser)
- },
- .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
- .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
- .closing_loc = PM_LOCATION_NULL_VALUE(parser),
- .predicate = predicate,
- .statements = statements
- };
-
- return node;
+ return pm_while_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ PM_LOCATION_INIT_UNSET,
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ ((pm_location_t) { 0 }),
+ predicate,
+ statements
+ );
}
/**
@@ -7899,31 +7166,22 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
*/
static pm_x_string_node_t *
pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
- pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
-
- *node = (pm_x_string_node_t) {
- {
- .type = PM_X_STRING_NODE,
- .flags = PM_STRING_FLAGS_FROZEN,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = opening->start,
- .end = closing->end
- },
- },
- .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
- .content_loc = PM_LOCATION_TOKEN_VALUE(content),
- .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
- .unescaped = *unescaped
- };
-
- return node;
+ return pm_x_string_node_new(
+ parser->arena,
+ ++parser->node_id,
+ PM_STRING_FLAGS_FROZEN,
+ PM_LOCATION_INIT_TOKENS(parser, opening, closing),
+ TOK2LOC(parser, opening),
+ TOK2LOC(parser, content),
+ TOK2LOC(parser, closing),
+ *unescaped
+ );
}
/**
* Allocate and initialize a new XStringNode node.
*/
-static inline pm_x_string_node_t *
+static PRISM_INLINE pm_x_string_node_t *
pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
}
@@ -7933,40 +7191,31 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
*/
static pm_yield_node_t *
pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
- pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
+ uint32_t start = PM_TOKEN_START(parser, keyword);
+ uint32_t end;
- const uint8_t *end;
- if (rparen_loc->start != NULL) {
- end = rparen_loc->end;
+ if (rparen_loc->length > 0) {
+ end = PM_LOCATION_END(rparen_loc);
} else if (arguments != NULL) {
- end = arguments->base.location.end;
- } else if (lparen_loc->start != NULL) {
- end = lparen_loc->end;
+ end = PM_NODE_END(arguments);
+ } else if (lparen_loc->length > 0) {
+ end = PM_LOCATION_END(lparen_loc);
} else {
- end = keyword->end;
- }
-
- *node = (pm_yield_node_t) {
- {
- .type = PM_YIELD_NODE,
- .node_id = PM_NODE_IDENTIFY(parser),
- .location = {
- .start = keyword->start,
- .end = end
- },
- },
- .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
- .lparen_loc = *lparen_loc,
- .arguments = arguments,
- .rparen_loc = *rparen_loc
- };
-
- return node;
+ end = PM_TOKEN_END(parser, keyword);
+ }
+
+ return pm_yield_node_new(
+ parser->arena,
+ ++parser->node_id,
+ 0,
+ ((pm_location_t) { .start = start, .length = U32(end - start) }),
+ TOK2LOC(parser, keyword),
+ *lparen_loc,
+ arguments,
+ *rparen_loc
+ );
}
-#undef PM_NODE_ALLOC
-#undef PM_NODE_IDENTIFY
-
/**
* Check if any of the currently visible scopes contain a local variable
* described by the given constant id.
@@ -7992,7 +7241,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
* described by the given token. This function implicitly inserts a constant
* into the constant pool.
*/
-static inline int
+static PRISM_INLINE int
pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
}
@@ -8000,27 +7249,35 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
/**
* Add a constant id to the local table of the current scope.
*/
-static inline void
+static PRISM_INLINE void
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
- pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
+ pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
}
/**
* Add a local variable from a location to the current scope.
*/
static pm_constant_id_t
-pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
+pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
+ pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
return constant_id;
}
/**
+ * Add a local variable from a location to the current scope.
+ */
+static PRISM_INLINE pm_constant_id_t
+pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
+ return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
+}
+
+/**
* Add a local variable from a token to the current scope.
*/
-static inline pm_constant_id_t
+static PRISM_INLINE pm_constant_id_t
pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
- return pm_parser_local_add_location(parser, token->start, token->end, reads);
+ return pm_parser_local_add_raw(parser, token->start, token->end, reads);
}
/**
@@ -8054,7 +7311,7 @@ static bool
pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
// We want to check whether the parameter name is a numbered parameter or
// not.
- pm_refute_numbered_parameter(parser, name->start, name->end);
+ pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
// Otherwise we'll fetch the constant id for the parameter name and check
// whether it's already in the current scope.
@@ -8078,8 +7335,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
pm_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
pm_locals_free(&scope->locals);
- pm_node_list_free(&scope->implicit_parameters);
- xfree(scope);
+ xfree_sized(scope, sizeof(pm_scope_t));
}
/******************************************************************************/
@@ -8089,7 +7345,7 @@ pm_parser_scope_pop(pm_parser_t *parser) {
/**
* Pushes a value onto the stack.
*/
-static inline void
+static PRISM_INLINE void
pm_state_stack_push(pm_state_stack_t *stack, bool value) {
*stack = (*stack << 1) | (value & 1);
}
@@ -8097,7 +7353,7 @@ pm_state_stack_push(pm_state_stack_t *stack, bool value) {
/**
* Pops a value off the stack.
*/
-static inline void
+static PRISM_INLINE void
pm_state_stack_pop(pm_state_stack_t *stack) {
*stack >>= 1;
}
@@ -8105,38 +7361,38 @@ pm_state_stack_pop(pm_state_stack_t *stack) {
/**
* Returns the value at the top of the stack.
*/
-static inline bool
+static PRISM_INLINE bool
pm_state_stack_p(const pm_state_stack_t *stack) {
return *stack & 1;
}
-static inline void
+static PRISM_INLINE void
pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
// Use the negation of the value to prevent stack overflow.
pm_state_stack_push(&parser->accepts_block_stack, !value);
}
-static inline void
+static PRISM_INLINE void
pm_accepts_block_stack_pop(pm_parser_t *parser) {
pm_state_stack_pop(&parser->accepts_block_stack);
}
-static inline bool
+static PRISM_INLINE bool
pm_accepts_block_stack_p(pm_parser_t *parser) {
return !pm_state_stack_p(&parser->accepts_block_stack);
}
-static inline void
+static PRISM_INLINE void
pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
pm_state_stack_push(&parser->do_loop_stack, value);
}
-static inline void
+static PRISM_INLINE void
pm_do_loop_stack_pop(pm_parser_t *parser) {
pm_state_stack_pop(&parser->do_loop_stack);
}
-static inline bool
+static PRISM_INLINE bool
pm_do_loop_stack_p(pm_parser_t *parser) {
return pm_state_stack_p(&parser->do_loop_stack);
}
@@ -8149,7 +7405,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) {
* Get the next character in the source starting from +cursor+. If that position
* is beyond the end of the source then return '\0'.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
if (cursor < parser->end) {
return *cursor;
@@ -8163,7 +7419,7 @@ peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
* adding the given offset. If that position is beyond the end of the source
* then return '\0'.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
return peek_at(parser, parser->current.end + offset);
}
@@ -8172,7 +7428,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
* Get the next character in the source starting from parser->current.end. If
* that position is beyond the end of the source then return '\0'.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
peek(const pm_parser_t *parser) {
return peek_at(parser, parser->current.end);
}
@@ -8181,7 +7437,7 @@ peek(const pm_parser_t *parser) {
* If the character to be read matches the given value, then returns true and
* advances the current pointer.
*/
-static inline bool
+static PRISM_INLINE bool
match(pm_parser_t *parser, uint8_t value) {
if (peek(parser) == value) {
parser->current.end++;
@@ -8194,7 +7450,7 @@ match(pm_parser_t *parser, uint8_t value) {
* Return the length of the line ending string starting at +cursor+, or 0 if it
* is not a line ending. This function is intended to be CRLF/LF agnostic.
*/
-static inline size_t
+static PRISM_INLINE size_t
match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
if (peek_at(parser, cursor) == '\n') {
return 1;
@@ -8210,7 +7466,7 @@ match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
* `parser->current.end + offset`, or 0 if it is not a line ending. This
* function is intended to be CRLF/LF agnostic.
*/
-static inline size_t
+static PRISM_INLINE size_t
match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
return match_eol_at(parser, parser->current.end + offset);
}
@@ -8220,7 +7476,7 @@ match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
* or 0 if it is not a line ending. This function is intended to be CRLF/LF
* agnostic.
*/
-static inline size_t
+static PRISM_INLINE size_t
match_eol(pm_parser_t *parser) {
return match_eol_at(parser, parser->current.end);
}
@@ -8228,7 +7484,7 @@ match_eol(pm_parser_t *parser) {
/**
* Skip to the next newline character or NUL byte.
*/
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
next_newline(const uint8_t *cursor, ptrdiff_t length) {
assert(length >= 0);
@@ -8241,7 +7497,7 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
/**
* This is equivalent to the predicate of warn_balanced in CRuby.
*/
-static inline bool
+static PRISM_INLINE bool
ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
}
@@ -8319,7 +7575,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) {
// issue because we didn't understand the encoding that the user was
// trying to use. In this case we'll keep using the default encoding but
// add an error to the parser to indicate an unsuccessful parse.
- pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
+ pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
}
}
@@ -8344,7 +7600,7 @@ parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t valu
}
}
-static inline bool
+static PRISM_INLINE bool
pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
return b == '\'' || b == '"' || b == ':' || b == ';';
}
@@ -8354,13 +7610,15 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
* found, it returns a pointer to the start of the marker. Otherwise it returns
* NULL.
*/
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
- while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
- if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
- return cursor;
+ // Scan for '*' as the middle character, since it is rarer than '-' in
+ // typical comments and avoids repeated memchr calls for '-' that hit
+ // dashes in words like "foo-bar".
+ while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
+ if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
+ return cursor - 1;
}
- cursor++;
}
return NULL;
}
@@ -8375,7 +7633,7 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor
* It returns true if it consumes the entire comment. Otherwise it returns
* false.
*/
-static inline bool
+static PRISM_INLINE bool
parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
bool result = true;
@@ -8397,11 +7655,24 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// have a magic comment.
return false;
}
+ } else {
+ // Non-emacs magic comments must contain a colon for `key: value`.
+ // Reject early if there is no colon to avoid scanning the entire
+ // comment character-by-character.
+ if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
+ return false;
+ }
+
+ // Advance start past leading whitespace so the main loop begins
+ // directly at the key, avoiding a redundant whitespace scan.
+ start += pm_strspn_whitespace(start, end - start);
}
cursor = start;
while (cursor < end) {
- while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+ if (indicator) {
+ while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
+ }
const uint8_t *key_start = cursor;
while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
@@ -8429,7 +7700,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
}
value_end = cursor;
- if (*cursor == '"') cursor++;
+ if (cursor < end && *cursor == '"') cursor++;
} else {
value_start = cursor;
while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
@@ -8487,7 +7758,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
(int) key_length,
(const char *) key_source,
@@ -8514,7 +7785,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
(int) key_length,
(const char *) key_source,
@@ -8549,7 +7820,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
} else {
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
(int) key_length,
(const char *) key_source,
@@ -8562,17 +7833,14 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
// When we're done, we want to free the string in case we had to
// allocate memory for it.
- pm_string_free(&key);
+ pm_string_cleanup(&key);
// Allocate a new magic comment node to append to the parser's list.
- pm_magic_comment_t *magic_comment;
- if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
- magic_comment->key_start = key_start;
- magic_comment->value_start = value_start;
- magic_comment->key_length = (uint32_t) key_length;
- magic_comment->value_length = value_length;
- pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
- }
+ pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
+ magic_comment->node.next = NULL;
+ magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
+ magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
+ pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
}
return result;
@@ -8582,85 +7850,67 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
/* Context manipulations */
/******************************************************************************/
-static bool
-context_terminator(pm_context_t context, pm_token_t *token) {
- switch (context) {
- case PM_CONTEXT_MAIN:
- case PM_CONTEXT_DEF_PARAMS:
- case PM_CONTEXT_DEFINED:
- case PM_CONTEXT_MULTI_TARGET:
- case PM_CONTEXT_TERNARY:
- case PM_CONTEXT_RESCUE_MODIFIER:
- return token->type == PM_TOKEN_EOF;
- case PM_CONTEXT_DEFAULT_PARAMS:
- return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
- case PM_CONTEXT_PREEXE:
- case PM_CONTEXT_POSTEXE:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_MODULE:
- case PM_CONTEXT_CLASS:
- case PM_CONTEXT_SCLASS:
- case PM_CONTEXT_LAMBDA_DO_END:
- case PM_CONTEXT_DEF:
- case PM_CONTEXT_BLOCK_KEYWORDS:
- return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
- case PM_CONTEXT_WHILE:
- case PM_CONTEXT_UNTIL:
- case PM_CONTEXT_ELSE:
- case PM_CONTEXT_FOR:
- case PM_CONTEXT_BEGIN_ENSURE:
- case PM_CONTEXT_BLOCK_ENSURE:
- case PM_CONTEXT_CLASS_ENSURE:
- case PM_CONTEXT_DEF_ENSURE:
- case PM_CONTEXT_LAMBDA_ENSURE:
- case PM_CONTEXT_MODULE_ENSURE:
- case PM_CONTEXT_SCLASS_ENSURE:
- return token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_LOOP_PREDICATE:
- return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
- case PM_CONTEXT_FOR_INDEX:
- return token->type == PM_TOKEN_KEYWORD_IN;
- case PM_CONTEXT_CASE_WHEN:
- return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
- case PM_CONTEXT_CASE_IN:
- return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
- case PM_CONTEXT_IF:
- case PM_CONTEXT_ELSIF:
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_UNLESS:
- return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_EMBEXPR:
- return token->type == PM_TOKEN_EMBEXPR_END;
- case PM_CONTEXT_BLOCK_BRACES:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_PARENS:
- return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
- case PM_CONTEXT_BEGIN:
- case PM_CONTEXT_BEGIN_RESCUE:
- case PM_CONTEXT_BLOCK_RESCUE:
- case PM_CONTEXT_CLASS_RESCUE:
- case PM_CONTEXT_DEF_RESCUE:
- case PM_CONTEXT_LAMBDA_RESCUE:
- case PM_CONTEXT_MODULE_RESCUE:
- case PM_CONTEXT_SCLASS_RESCUE:
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_BEGIN_ELSE:
- case PM_CONTEXT_BLOCK_ELSE:
- case PM_CONTEXT_CLASS_ELSE:
- case PM_CONTEXT_DEF_ELSE:
- case PM_CONTEXT_LAMBDA_ELSE:
- case PM_CONTEXT_MODULE_ELSE:
- case PM_CONTEXT_SCLASS_ELSE:
- return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
- case PM_CONTEXT_LAMBDA_BRACES:
- return token->type == PM_TOKEN_BRACE_RIGHT;
- case PM_CONTEXT_PREDICATE:
- return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
- case PM_CONTEXT_NONE:
- return false;
- }
+static const uint32_t context_terminators[] = {
+ [PM_CONTEXT_NONE] = 0,
+ [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
+ [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+ [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
+ [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+ [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
+ [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
+ [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
+ [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
+ [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
+ [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
+ [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
+ [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
+ [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
+ [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
+};
- return false;
+static PRISM_INLINE bool
+context_terminator(pm_context_t context, pm_token_t *token) {
+ return token->type < 32 && (context_terminators[context] & (1U << token->type));
}
/**
@@ -8699,7 +7949,7 @@ context_push(pm_parser_t *parser, pm_context_t context) {
static void
context_pop(pm_parser_t *parser) {
pm_context_node_t *prev = parser->current_context->prev;
- xfree(parser->current_context);
+ xfree_sized(parser->current_context, sizeof(pm_context_node_t));
parser->current_context = prev;
}
@@ -8761,6 +8011,7 @@ context_human(pm_context_t context) {
case PM_CONTEXT_BEGIN: return "begin statement";
case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
+ case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
case PM_CONTEXT_CASE_WHEN: return "'when' clause";
case PM_CONTEXT_CASE_IN: return "'in' clause";
case PM_CONTEXT_CLASS: return "class definition";
@@ -8821,11 +8072,11 @@ context_human(pm_context_t context) {
/* Specific token lexers */
/******************************************************************************/
-static inline void
+static PRISM_INLINE void
pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
if (invalid != NULL) {
pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
- pm_parser_err(parser, invalid, invalid + 1, diag_id);
+ pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
}
}
@@ -8936,7 +8187,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
}
- parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
break;
// 0o1111 is an octal number
@@ -8950,7 +8201,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
}
- parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
break;
// 01111 is an octal number
@@ -8964,7 +8215,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
case '6':
case '7':
parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
- parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
break;
// 0x1111 is a hexadecimal number
@@ -8978,7 +8229,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
}
- parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
break;
// 0.xxx is a float
@@ -8996,11 +8247,62 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
}
} else {
// If it didn't start with a 0, then we'll lex as far as we can into a
- // decimal number.
- parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
+ // decimal number. We compute the integer value inline to avoid
+ // re-scanning the digits later in pm_integer_parse.
+ {
+ const uint8_t *cursor = parser->current.end;
+ const uint8_t *end = parser->end;
+ uint64_t value = (uint64_t) (cursor[-1] - '0');
+
+ bool has_underscore = false;
+ bool prev_underscore = false;
+ const uint8_t *invalid = NULL;
+
+ while (cursor < end) {
+ uint8_t c = *cursor;
+ if (c >= '0' && c <= '9') {
+ if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
+ prev_underscore = false;
+ cursor++;
+ } else if (c == '_') {
+ has_underscore = true;
+ if (prev_underscore && invalid == NULL) invalid = cursor;
+ prev_underscore = true;
+ cursor++;
+ } else {
+ break;
+ }
+ }
+
+ if (has_underscore) {
+ if (prev_underscore && invalid == NULL) invalid = cursor - 1;
+ pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
+ }
+
+ if (value <= UINT32_MAX) {
+ parser->integer.value = (uint32_t) value;
+ parser->integer.lexed = true;
+ }
+
+ parser->current.end = cursor;
+ }
// Afterward, we'll lex as far as we can into an optional float suffix.
- type = lex_optional_float_suffix(parser, seen_e);
+ // Guard the function call: the vast majority of decimal numbers are
+ // plain integers, so avoid the call when the next byte cannot start a
+ // float suffix.
+ {
+ uint8_t next = peek(parser);
+ if (next == '.' || next == 'e' || next == 'E') {
+ type = lex_optional_float_suffix(parser, seen_e);
+
+ // If it turned out to be a float, the cached integer value is
+ // invalid.
+ if (type != PM_TOKEN_INTEGER) {
+ parser->integer.lexed = false;
+ }
+ }
+ }
}
// At this point we have a completed number, but we want to provide the user
@@ -9010,7 +8312,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
const uint8_t *fraction_start = parser->current.end;
const uint8_t *fraction_end = parser->current.end + 2;
fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
- pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
+ pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
}
return type;
@@ -9019,7 +8321,8 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
static pm_token_type_t
lex_numeric(pm_parser_t *parser) {
pm_token_type_t type = PM_TOKEN_INTEGER;
- parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+ parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
+ parser->integer.lexed = false;
if (parser->current.end < parser->end) {
bool seen_e = false;
@@ -9109,8 +8412,8 @@ lex_global_variable(pm_parser_t *parser) {
} while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
// $0 isn't allowed to be followed by anything.
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9146,9 +8449,9 @@ lex_global_variable(pm_parser_t *parser) {
} else {
// If we get here, then we have a $ followed by something that
// isn't recognized as a global variable.
- pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
- const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
+ pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
}
return PM_TOKEN_GLOBAL_VARIABLE;
@@ -9168,7 +8471,7 @@ lex_global_variable(pm_parser_t *parser) {
* * `type` - the expected token type
* * `modifier_type` - the expected modifier token type
*/
-static inline pm_token_type_t
+static PRISM_INLINE pm_token_type_t
lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
if (memcmp(current_start, value, vlen) == 0) {
pm_lex_state_t last_state = parser->lex_state;
@@ -9207,6 +8510,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
current_end += width;
}
} else {
+ // Fast path: scan ASCII identifier bytes using wide operations.
+ current_end += scan_identifier_ascii(current_end, end);
+
+ // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
current_end += width;
}
@@ -9266,9 +8573,15 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
switch (width) {
case 2:
if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
+ if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
+ return PM_TOKEN_KEYWORD_DO;
+ }
if (pm_do_loop_stack_p(parser)) {
return PM_TOKEN_KEYWORD_DO_LOOP;
}
+ if (!pm_accepts_block_stack_p(parser)) {
+ return PM_TOKEN_KEYWORD_DO_BLOCK;
+ }
return PM_TOKEN_KEYWORD_DO;
}
@@ -9347,8 +8660,8 @@ current_token_starts_line(pm_parser_t *parser) {
* handle interpolation. This function performs that check. It returns a token
* type representing what it found. Those cases are:
*
- * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The
- * caller should keep lexing.
+ * * 0 - No interpolation was found at this point. The caller should keep
+ * lexing.
* * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The
* caller should return this token type.
* * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller
@@ -9365,9 +8678,9 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
return PM_TOKEN_STRING_CONTENT;
}
- // Now we'll check against the character that follows the #. If it constitutes
- // valid interplation, we'll handle that, otherwise we'll return
- // PM_TOKEN_NOT_PROVIDED.
+ // Now we'll check against the character that follows the #. If it
+ // constitutes valid interplation, we'll handle that, otherwise we'll return
+ // 0.
switch (pound[1]) {
case '@': {
// In this case we may have hit an embedded instance or class variable.
@@ -9401,7 +8714,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// string content. This is like if we get "#@-". In this case the caller
// should keep lexing.
parser->current.end = pound + 1;
- return PM_TOKEN_NOT_PROVIDED;
+ return 0;
}
case '$':
// In this case we may have hit an embedded global variable. If there's
@@ -9451,7 +8764,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// In this case we've hit a #$ that does not indicate a global variable.
// In this case we'll continue lexing past it.
parser->current.end = pound + 1;
- return PM_TOKEN_NOT_PROVIDED;
+ return 0;
case '{':
// In this case it's the start of an embedded expression. If we have
// already consumed content, then we need to return that content as string
@@ -9475,7 +8788,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
// mark that by returning the not provided token type. This tells the
// consumer to keep lexing forward.
parser->current.end = pound + 1;
- return PM_TOKEN_NOT_PROVIDED;
+ return 0;
}
}
@@ -9499,7 +8812,7 @@ static const bool ascii_printable_chars[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
-static inline bool
+static PRISM_INLINE bool
char_is_ascii_printable(const uint8_t b) {
return (b < 0x80) && ascii_printable_chars[b];
}
@@ -9508,7 +8821,7 @@ char_is_ascii_printable(const uint8_t b) {
* Return the value that a hexadecimal digit character represents. For example,
* transform 'a' into 10, 'b' into 11, etc.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
escape_hexadecimal_digit(const uint8_t value) {
return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
}
@@ -9518,8 +8831,8 @@ escape_hexadecimal_digit(const uint8_t value) {
* digits scanned. This function assumes that the characters have already been
* validated.
*/
-static inline uint32_t
-escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
+static PRISM_INLINE uint32_t
+escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
uint32_t value = 0;
for (size_t index = 0; index < length; index++) {
if (index != 0) value <<= 4;
@@ -9529,7 +8842,14 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
// Here we're going to verify that the value is actually a valid Unicode
// codepoint and not a surrogate pair.
if (value >= 0xD800 && value <= 0xDFFF) {
- pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, defer the error to regexp encoding
+ // validation where we can produce a regexp-specific message.
+ } else if (error_location != NULL) {
+ pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
+ } else {
+ pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
return 0xFFFD;
}
@@ -9539,7 +8859,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
/**
* Escape a single character value based on the given flags.
*/
-static inline uint8_t
+static PRISM_INLINE uint8_t
escape_byte(uint8_t value, const uint8_t flags) {
if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
@@ -9549,21 +8869,32 @@ escape_byte(uint8_t value, const uint8_t flags) {
/**
* Write a unicode codepoint to the given buffer.
*/
-static inline void
+static PRISM_INLINE void
escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
// \u escape sequences in string-like structures implicitly change the
// encoding to UTF-8 if they are >= 0x80 or if they are used in a character
// literal.
if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
- PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, suppress this error — the regexp encoding
+ // validation will produce a more specific error message.
+ } else {
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
+ }
}
parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
}
if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
- pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, defer the error to the regexp encoding
+ // validation which produces a regexp-specific message.
+ } else {
+ pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
+ }
+
pm_buffer_append_byte(buffer, 0xEF);
pm_buffer_append_byte(buffer, 0xBF);
pm_buffer_append_byte(buffer, 0xBD);
@@ -9574,11 +8905,16 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla
* When you're writing a byte to the unescape buffer, if the byte is non-ASCII
* (i.e., the top bit is set) then it locks in the encoding.
*/
-static inline void
-escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
+static PRISM_INLINE void
+escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
if (byte >= 0x80) {
if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ // In regexp context, suppress this error — the regexp encoding
+ // validation will produce a more specific error message.
+ } else {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+ }
}
parser->explicit_encoding = parser->encoding;
@@ -9602,19 +8938,19 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
* Note that in this case there is a literal \ byte in the regular expression
* source so that the regular expression engine will perform its own unescaping.
*/
-static inline void
+static PRISM_INLINE void
escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
}
- escape_write_byte_encoded(parser, buffer, byte);
+ escape_write_byte_encoded(parser, buffer, flags, byte);
}
/**
* Write each byte of the given escaped character into the buffer.
*/
-static inline void
+static PRISM_INLINE void
escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
size_t width;
if (parser->encoding_changed) {
@@ -9624,6 +8960,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_
}
if (width == 1) {
+ if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
} else if (width > 1) {
// Valid multibyte character. Just ignore escape.
@@ -9649,7 +8986,7 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
PM_PARSER_WARN_TOKEN_FORMAT(
parser,
- parser->current,
+ &parser->current,
PM_WARN_INVALID_CHARACTER,
FLAG(flags),
FLAG(flag),
@@ -9764,7 +9101,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
}
- escape_write_byte_encoded(parser, buffer, value);
+ escape_write_byte_encoded(parser, buffer, flags, value);
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
}
@@ -9777,7 +9114,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (parser->current.end == parser->end) {
const uint8_t *start = parser->current.end - 2;
- PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
} else if (peek(parser) == '{') {
const uint8_t *unicode_codepoints_start = parser->current.end - 2;
parser->current.end++;
@@ -9806,18 +9143,19 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (hexadecimal_length > 6) {
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
- pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
+ pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
} else if (hexadecimal_length == 0) {
// there are not hexadecimal characters
if (flags & PM_ESCAPE_FLAG_REGEXP) {
// If this is a regular expression, we are going to
// let the regular expression engine handle this
- // error instead of us.
+ // error instead of us because we don't know at this
+ // point if we're inside a comment in /x mode.
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
} else {
- pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
- pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
+ pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
}
return;
@@ -9829,7 +9167,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
extra_codepoints_start = unicode_start;
}
- uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
+ uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
@@ -9838,21 +9176,22 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
// ?\u{nnnn} character literal should contain only one codepoint
// and cannot be like ?\u{nnnn mmmm}.
if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
- pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
+ pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
}
if (parser->current.end == parser->end) {
- PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
} else if (peek(parser) == '}') {
parser->current.end++;
} else {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
// If this is a regular expression, we are going to let
// the regular expression engine handle this error
- // instead of us.
+ // instead of us because we don't know at this point if
+ // we're inside a comment in /x mode.
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
} else {
- pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+ pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
}
}
@@ -9867,10 +9206,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
} else {
const uint8_t *start = parser->current.end - 2;
- PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
+ PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
}
} else if (length == 4) {
- uint32_t value = escape_unicode(parser, parser->current.end, 4);
+ uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
@@ -9916,7 +9255,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
if (match(parser, 'u') || match(parser, 'U')) {
- pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
@@ -9938,6 +9277,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
return;
}
+ if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
@@ -9952,7 +9292,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (peek(parser) != '-') {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
@@ -9973,7 +9313,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
if (match(parser, 'u') || match(parser, 'U')) {
- pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
@@ -9992,10 +9332,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
default: {
if (!char_is_ascii_printable(peeked)) {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
return;
}
+ if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
@@ -10010,7 +9351,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
if (peek(parser) != '-') {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
return;
}
@@ -10026,7 +9367,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
parser->current.end++;
if (match(parser, 'u') || match(parser, 'U')) {
- pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
return;
}
@@ -10045,10 +9386,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
default:
if (!char_is_ascii_printable(peeked)) {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
return;
}
+ if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
return;
@@ -10056,8 +9398,9 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
case '\r': {
if (peek_offset(parser, 1) == '\n') {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
parser->current.end += 2;
- escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
+ escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
return;
}
PRISM_FALLTHROUGH
@@ -10065,7 +9408,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
default: {
if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
- pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
return;
}
if (parser->current.end < parser->end) {
@@ -10127,10 +9470,14 @@ lex_question_mark(pm_parser_t *parser) {
lex_state_set(parser, PM_LEX_STATE_END);
pm_buffer_t buffer;
- pm_buffer_init_capacity(&buffer, 3);
+ pm_buffer_init(&buffer, 3);
escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
- pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
+
+ // Copy buffer data into the arena and free the heap buffer.
+ void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
+ pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
+ pm_buffer_cleanup(&buffer);
return PM_TOKEN_CHARACTER_LITERAL;
} else {
@@ -10173,12 +9520,12 @@ lex_at_variable(pm_parser_t *parser) {
}
} else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
}
size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
} else {
pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
pm_parser_err_token(parser, &parser->current, diag_id);
@@ -10196,24 +9543,23 @@ lex_at_variable(pm_parser_t *parser) {
/**
* Optionally call out to the lex callback if one is provided.
*/
-static inline void
+static PRISM_INLINE void
parser_lex_callback(pm_parser_t *parser) {
- if (parser->lex_callback) {
- parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
+ if (parser->lex_callback.callback) {
+ parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data);
}
}
/**
* Return a new comment node of the specified type.
*/
-static inline pm_comment_t *
+static PRISM_INLINE pm_comment_t *
parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
- pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
- if (comment == NULL) return NULL;
+ pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
*comment = (pm_comment_t) {
.type = type,
- .location = { parser->current.start, parser->current.end }
+ .location = TOK2LOC(parser, &parser->current)
};
return comment;
@@ -10232,7 +9578,7 @@ lex_embdoc(pm_parser_t *parser) {
if (newline == NULL) {
parser->current.end = parser->end;
} else {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
parser->current.end = newline + 1;
}
@@ -10240,8 +9586,8 @@ lex_embdoc(pm_parser_t *parser) {
parser_lex_callback(parser);
// Now, create a comment that is going to be attached to the parser.
+ const uint8_t *comment_start = parser->current.start;
pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
- if (comment == NULL) return PM_TOKEN_EOF;
// Now, loop until we find the end of the embedded documentation or the end
// of the file.
@@ -10265,14 +9611,14 @@ lex_embdoc(pm_parser_t *parser) {
if (newline == NULL) {
parser->current.end = parser->end;
} else {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
parser->current.end = newline + 1;
}
parser->current.type = PM_TOKEN_EMBDOC_END;
parser_lex_callback(parser);
- comment->location.end = parser->current.end;
+ comment->location.length = (uint32_t) (parser->current.end - comment_start);
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
return PM_TOKEN_EMBDOC_END;
@@ -10285,7 +9631,7 @@ lex_embdoc(pm_parser_t *parser) {
if (newline == NULL) {
parser->current.end = parser->end;
} else {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
parser->current.end = newline + 1;
}
@@ -10295,7 +9641,7 @@ lex_embdoc(pm_parser_t *parser) {
pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
- comment->location.end = parser->current.end;
+ comment->location.length = (uint32_t) (parser->current.end - comment_start);
pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
return PM_TOKEN_EOF;
@@ -10306,7 +9652,7 @@ lex_embdoc(pm_parser_t *parser) {
* This happens in a couple places depending on whether or not we have already
* lexed a comment.
*/
-static inline void
+static PRISM_INLINE void
parser_lex_ignored_newline(pm_parser_t *parser) {
parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
parser_lex_callback(parser);
@@ -10321,7 +9667,7 @@ parser_lex_ignored_newline(pm_parser_t *parser) {
* If it is set, then we need to skip past the heredoc body and then clear the
* heredoc_end field.
*/
-static inline void
+static PRISM_INLINE void
parser_flush_heredoc_end(pm_parser_t *parser) {
assert(parser->heredoc_end <= parser->end);
parser->next_start = parser->heredoc_end;
@@ -10397,12 +9743,12 @@ typedef struct {
/**
* Push the given byte into the token buffer.
*/
-static inline void
+static PRISM_INLINE void
pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->buffer, byte);
}
-static inline void
+static PRISM_INLINE void
pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
}
@@ -10410,7 +9756,7 @@ pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t
/**
* Return the width of the character at the end of the current token.
*/
-static inline size_t
+static PRISM_INLINE size_t
parser_char_width(const pm_parser_t *parser) {
size_t width;
if (parser->encoding_changed) {
@@ -10437,36 +9783,31 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
static void
pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
size_t width = parser_char_width(parser);
- pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
- pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
+ const uint8_t *start = parser->current.end;
+ pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
parser->current.end += width;
}
-static bool
-pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
- for (size_t index = 0; index < length; index++) {
- if (value[index] & 0x80) return false;
- }
-
- return true;
-}
-
/**
* When we're about to return from lexing the current token and we know for sure
* that we have found an escape sequence, this function is called to copy the
* contents of the token buffer into the current string on the parser so that it
* can be attached to the correct node.
*/
-static inline void
+static PRISM_INLINE void
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
- pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
+ // Copy buffer data into the arena and free the heap buffer.
+ size_t len = pm_buffer_length(&token_buffer->buffer);
+ void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
+ pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
+ pm_buffer_cleanup(&token_buffer->buffer);
}
-static inline void
+static PRISM_INLINE void
pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
- pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
- parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
- pm_buffer_free(&token_buffer->regexp_buffer);
+ pm_token_buffer_copy(parser, &token_buffer->base);
+ pm_buffer_cleanup(&token_buffer->regexp_buffer);
}
/**
@@ -10492,10 +9833,11 @@ static void
pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
if (token_buffer->base.cursor == NULL) {
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
- parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
} else {
- pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
- pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
+ const uint8_t *cursor = token_buffer->base.cursor;
+ size_t length = (size_t) (parser->current.end - cursor);
+ pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
+ pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
pm_regexp_token_buffer_copy(parser, token_buffer);
}
}
@@ -10514,7 +9856,7 @@ static void
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->cursor == NULL) {
- pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
start = parser->current.start;
} else {
start = token_buffer->cursor;
@@ -10531,8 +9873,8 @@ static void
pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->base.cursor == NULL) {
- pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
- pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
+ pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
start = parser->current.start;
} else {
start = token_buffer->base.cursor;
@@ -10551,7 +9893,7 @@ pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *tok
* Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
* a tilde heredoc expands out tab characters to the nearest tab boundaries.
*/
-static inline size_t
+static PRISM_INLINE size_t
pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
size_t whitespace = 0;
@@ -10599,7 +9941,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
parser_flush_heredoc_end(parser);
} else {
// Otherwise, we'll add the newline to the list of newlines.
- pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
}
uint8_t delimiter = *parser->current.end;
@@ -10647,6 +9989,12 @@ parser_lex(pm_parser_t *parser) {
unsigned int semantic_token_seen = parser->semantic_token_seen;
parser->semantic_token_seen = true;
+ // We'll jump to this label when we are about to encounter an EOF.
+ // If we still have lex_modes on the stack, we pop them so that cleanup
+ // can happen. For example, we should still continue parsing after a heredoc
+ // identifier, even if the heredoc body was syntax invalid.
+ switch_lex_modes:
+
switch (parser->lex_modes.current->mode) {
case PM_LEX_DEFAULT:
case PM_LEX_EMBEXPR:
@@ -10669,22 +10017,29 @@ parser_lex(pm_parser_t *parser) {
bool space_seen = false;
// First, we're going to skip past any whitespace at the front of the next
- // token.
+ // token. Skip runs of inline whitespace in bulk to avoid per-character
+ // stores back to parser->current.end.
bool chomping = true;
while (parser->current.end < parser->end && chomping) {
- switch (*parser->current.end) {
- case ' ':
- case '\t':
- case '\f':
- case '\v':
- parser->current.end++;
+ {
+ static const uint8_t inline_whitespace[256] = {
+ [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
+ };
+ const uint8_t *scan = parser->current.end;
+ while (scan < parser->end && inline_whitespace[*scan]) scan++;
+ if (scan > parser->current.end) {
+ parser->current.end = scan;
space_seen = true;
- break;
+ continue;
+ }
+ }
+
+ switch (*parser->current.end) {
case '\r':
if (match_eol_offset(parser, 1)) {
chomping = false;
} else {
- pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
+ pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
parser->current.end++;
space_seen = true;
}
@@ -10697,7 +10052,7 @@ parser_lex(pm_parser_t *parser) {
parser->heredoc_end = NULL;
} else {
parser->current.end += eol_length + 1;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
space_seen = true;
}
} else if (pm_char_is_inline_whitespace(*parser->current.end)) {
@@ -10720,6 +10075,14 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
+ // We may be missing closing tokens. We should pop modes one by one
+ // to do the appropriate cleanup like moving next_start for heredocs.
+ // Only when no mode is remaining will we actually emit the EOF token.
+ if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+ lex_mode_pop(parser);
+ goto switch_lex_modes;
+ }
+
// If we hit EOF, but the EOF came immediately after a newline,
// set the start of the token to the newline. This way any EOF
// errors will be reported as happening on that line rather than
@@ -10791,7 +10154,7 @@ parser_lex(pm_parser_t *parser) {
}
if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
}
}
@@ -10849,14 +10212,50 @@ parser_lex(pm_parser_t *parser) {
following = next_newline(following, parser->end - following);
}
- // If the lex state was ignored, or we hit a '.' or a '&.',
- // we will lex the ignored newline
+ // If the lex state was ignored, we will lex the
+ // ignored newline.
+ if (lex_state_ignored_p(parser)) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lexed_comment = false;
+ goto lex_next_token;
+ }
+
+ // If we hit a '.' or a '&.' we will lex the ignored
+ // newline.
+ if (following && (
+ (peek_at(parser, following) == '.') ||
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
+ )) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lexed_comment = false;
+ goto lex_next_token;
+ }
+
+
+ // If we are parsing as CRuby 4.0 or later and we
+ // hit a '&&' or a '||' then we will lex the ignored
+ // newline.
if (
- lex_state_ignored_p(parser) ||
- (following && (
- (peek_at(parser, following) == '.') ||
- (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
- ))
+ (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
+ following && (
+ (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
+ (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
+ (
+ peek_at(parser, following) == 'a' &&
+ peek_at(parser, following + 1) == 'n' &&
+ peek_at(parser, following + 2) == 'd' &&
+ peek_at(parser, next_content + 3) != '!' &&
+ peek_at(parser, next_content + 3) != '?' &&
+ !char_is_identifier(parser, following + 3, parser->end - (following + 3))
+ ) ||
+ (
+ peek_at(parser, following) == 'o' &&
+ peek_at(parser, following + 1) == 'r' &&
+ peek_at(parser, next_content + 2) != '!' &&
+ peek_at(parser, next_content + 2) != '?' &&
+ !char_is_identifier(parser, following + 2, parser->end - (following + 2))
+ )
+ )
) {
if (!lexed_comment) parser_lex_ignored_newline(parser);
lexed_comment = false;
@@ -10896,6 +10295,67 @@ parser_lex(pm_parser_t *parser) {
parser->next_start = NULL;
LEX(PM_TOKEN_AMPERSAND_DOT);
}
+
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+ // If we hit an && then we are in a logical chain
+ // and we need to return the logical operator.
+ if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
+ }
+
+ // If we hit a || then we are in a logical chain and
+ // we need to return the logical operator.
+ if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ LEX(PM_TOKEN_PIPE_PIPE);
+ }
+
+ // If we hit an 'and' then we are in a logical chain
+ // and we need to return the logical operator.
+ if (
+ peek_at(parser, next_content) == 'a' &&
+ peek_at(parser, next_content + 1) == 'n' &&
+ peek_at(parser, next_content + 2) == 'd' &&
+ peek_at(parser, next_content + 3) != '!' &&
+ peek_at(parser, next_content + 3) != '?' &&
+ !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
+ ) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 3;
+ parser->next_start = NULL;
+ parser->command_start = true;
+ LEX(PM_TOKEN_KEYWORD_AND);
+ }
+
+ // If we hit a 'or' then we are in a logical chain
+ // and we need to return the logical operator.
+ if (
+ peek_at(parser, next_content) == 'o' &&
+ peek_at(parser, next_content + 1) == 'r' &&
+ peek_at(parser, next_content + 2) != '!' &&
+ peek_at(parser, next_content + 2) != '?' &&
+ !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
+ ) {
+ if (!lexed_comment) parser_lex_ignored_newline(parser);
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->current.start = next_content;
+ parser->current.end = next_content + 2;
+ parser->next_start = NULL;
+ parser->command_start = true;
+ LEX(PM_TOKEN_KEYWORD_OR);
+ }
+ }
}
// At this point we know this is a regular newline, and we can set the
@@ -10910,7 +10370,7 @@ parser_lex(pm_parser_t *parser) {
// ,
case ',':
if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
}
lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
@@ -11036,7 +10496,7 @@ parser_lex(pm_parser_t *parser) {
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR_STAR;
} else if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -11061,7 +10521,7 @@ parser_lex(pm_parser_t *parser) {
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_USTAR;
} else if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -11187,7 +10647,7 @@ parser_lex(pm_parser_t *parser) {
bool ident_error = false;
if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
- pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
+ pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
ident_error = true;
}
@@ -11220,7 +10680,7 @@ parser_lex(pm_parser_t *parser) {
} else {
// Otherwise, we want to indicate that the body of the
// heredoc starts on the character after the next newline.
- pm_newline_list_append(&parser->newline_list, body_start);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
body_start++;
}
@@ -11239,7 +10699,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
}
if (lex_state_operator_p(parser)) {
@@ -11365,7 +10825,7 @@ parser_lex(pm_parser_t *parser) {
} else if (lex_state_beg_p(parser)) {
type = PM_TOKEN_UAMPERSAND;
} else if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
}
if (lex_state_operator_p(parser)) {
@@ -11441,7 +10901,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
}
lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -11482,7 +10942,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
}
lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -11581,7 +11041,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
}
if (lex_state_operator_p(parser)) {
@@ -11766,7 +11226,7 @@ parser_lex(pm_parser_t *parser) {
}
if (ambiguous_operator_p(parser, space_seen)) {
- PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
+ PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
}
lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
@@ -11802,40 +11262,40 @@ parser_lex(pm_parser_t *parser) {
// token after adding an appropriate error message.
if (!width) {
if (*parser->current.start >= 0x80) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
} else if (*parser->current.start == '\\') {
switch (peek_at(parser, parser->current.start + 1)) {
case ' ':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
break;
case '\f':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
break;
case '\t':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
break;
case '\v':
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
break;
case '\r':
if (peek_at(parser, parser->current.start + 2) != '\n') {
parser->current.end++;
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
break;
}
PRISM_FALLTHROUGH
default:
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
break;
}
} else if (char_is_ascii_printable(*parser->current.start)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
} else {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
}
goto lex_next_token;
@@ -11861,15 +11321,15 @@ parser_lex(pm_parser_t *parser) {
// correct column information for it.
const uint8_t *cursor = parser->current.end;
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
- pm_newline_list_append(&parser->newline_list, cursor++);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
}
parser->current.end = parser->end;
parser->current.type = PM_TOKEN___END__;
parser_lex_callback(parser);
- parser->data_loc.start = parser->current.start;
- parser->data_loc.end = parser->current.end;
+ parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
+ parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
LEX(PM_TOKEN_EOF);
}
@@ -11894,7 +11354,7 @@ parser_lex(pm_parser_t *parser) {
!(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
(type == PM_TOKEN_IDENTIFIER) &&
((pm_parser_local_depth(parser, &parser->current) != -1) ||
- pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
+ pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
) {
lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
}
@@ -11922,7 +11382,7 @@ parser_lex(pm_parser_t *parser) {
whitespace += 1;
}
} else {
- whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
+ whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
}
if (whitespace > 0) {
@@ -12037,7 +11497,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
}
parser->current.end++;
@@ -12065,7 +11525,7 @@ parser_lex(pm_parser_t *parser) {
if (*breakpoint == '#') {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had something
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
@@ -12170,7 +11630,13 @@ parser_lex(pm_parser_t *parser) {
size_t eol_length = match_eol_at(parser, breakpoint);
if (eol_length) {
parser->current.end = breakpoint + eol_length;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+ // Track the newline if we're not in a heredoc that
+ // would have already have added the newline to the
+ // list.
+ if (parser->heredoc_end == NULL) {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+ }
} else {
parser->current.end = breakpoint + 1;
}
@@ -12216,7 +11682,7 @@ parser_lex(pm_parser_t *parser) {
// If we've hit a newline, then we need to track that in
// the list of newlines.
if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
break;
@@ -12264,7 +11730,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
}
parser->current.end++;
@@ -12311,7 +11777,7 @@ parser_lex(pm_parser_t *parser) {
// interpolation.
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had
// something that looked like an interpolated class or
// instance variable like "#@" but wasn't actually. In
@@ -12424,7 +11890,13 @@ parser_lex(pm_parser_t *parser) {
size_t eol_length = match_eol_at(parser, breakpoint);
if (eol_length) {
parser->current.end = breakpoint + eol_length;
- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
+
+ // Track the newline if we're not in a heredoc that
+ // would have already have added the newline to the
+ // list.
+ if (parser->heredoc_end == NULL) {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
+ }
} else {
parser->current.end = breakpoint + 1;
}
@@ -12436,6 +11908,13 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_LABEL_END);
}
+ // When the delimiter itself is a newline, we won't
+ // get a chance to flush heredocs in the usual places since
+ // the newline is already consumed.
+ if (term == '\n' && parser->heredoc_end) {
+ parser_flush_heredoc_end(parser);
+ }
+
lex_state_set(parser, PM_LEX_STATE_END);
lex_mode_pop(parser);
LEX(PM_TOKEN_STRING_END);
@@ -12468,7 +11947,7 @@ parser_lex(pm_parser_t *parser) {
// for the terminator in case the terminator is a
// newline character.
if (parser->heredoc_end == NULL) {
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
break;
@@ -12522,7 +12001,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
- pm_newline_list_append(&parser->newline_list, parser->current.end);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
}
parser->current.end++;
@@ -12551,7 +12030,7 @@ parser_lex(pm_parser_t *parser) {
case '#': {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had something that
// looked like an interpolated class or instance variable like "#@"
// but wasn't actually. In this case we'll just skip to the next
@@ -12651,7 +12130,7 @@ parser_lex(pm_parser_t *parser) {
(memcmp(terminator_start, ident_start, ident_length) == 0)
) {
if (newline != NULL) {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
}
parser->current.end = terminator_end;
@@ -12682,7 +12161,7 @@ parser_lex(pm_parser_t *parser) {
// Otherwise we'll be parsing string content. These are the places
// where we need to split up the content of the heredoc. We'll use
// strpbrk to find the first of these characters.
- uint8_t breakpoints[] = "\r\n\\#";
+ uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
if (quote == PM_HEREDOC_QUOTE_SINGLE) {
@@ -12723,7 +12202,7 @@ parser_lex(pm_parser_t *parser) {
LEX(PM_TOKEN_STRING_CONTENT);
}
- pm_newline_list_append(&parser->newline_list, breakpoint);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
// If we have a - or ~ heredoc, then we can match after
// some leading whitespace.
@@ -12841,7 +12320,10 @@ parser_lex(pm_parser_t *parser) {
// string content.
if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
const uint8_t *end = parser->current.end;
- pm_newline_list_append(&parser->newline_list, end);
+
+ if (parser->heredoc_end == NULL) {
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
+ }
// Here we want the buffer to only
// include up to the backslash.
@@ -12872,7 +12354,7 @@ parser_lex(pm_parser_t *parser) {
case '#': {
pm_token_type_t type = lex_interpolation(parser, breakpoint);
- if (type == PM_TOKEN_NOT_PROVIDED) {
+ if (!type) {
// If we haven't returned at this point then we had
// something that looked like an interpolated class
// or instance variable like "#@" but wasn't
@@ -13097,7 +12579,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
/**
* Returns true if the current token is of the given type.
*/
-static inline bool
+static PRISM_INLINE bool
match1(const pm_parser_t *parser, pm_token_type_t type) {
return parser->current.type == type;
}
@@ -13105,7 +12587,7 @@ match1(const pm_parser_t *parser, pm_token_type_t type) {
/**
* Returns true if the current token is of either of the given types.
*/
-static inline bool
+static PRISM_INLINE bool
match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
return match1(parser, type1) || match1(parser, type2);
}
@@ -13113,7 +12595,7 @@ match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2)
/**
* Returns true if the current token is any of the three given types.
*/
-static inline bool
+static PRISM_INLINE bool
match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
}
@@ -13121,15 +12603,23 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
/**
* Returns true if the current token is any of the four given types.
*/
-static inline bool
+static PRISM_INLINE bool
match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
}
/**
+ * Returns true if the current token is any of the six given types.
+ */
+static PRISM_INLINE bool
+match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
+}
+
+/**
* Returns true if the current token is any of the seven given types.
*/
-static inline bool
+static PRISM_INLINE bool
match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
}
@@ -13137,20 +12627,12 @@ match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
/**
* Returns true if the current token is any of the eight given types.
*/
-static inline bool
+static PRISM_INLINE bool
match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
}
/**
- * Returns true if the current token is any of the nine given types.
- */
-static inline bool
-match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
-}
-
-/**
* If the current token is of the specified type, lex forward by one token and
* return true. Otherwise, return false. For example:
*
@@ -13169,7 +12651,7 @@ accept1(pm_parser_t *parser, pm_token_type_t type) {
* If the current token is either of the two given types, lex forward by one
* token and return true. Otherwise return false.
*/
-static inline bool
+static PRISM_INLINE bool
accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
if (match2(parser, type1, type2)) {
parser_lex(parser);
@@ -13194,10 +12676,10 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
if (accept1(parser, type)) return;
const uint8_t *location = parser->previous.end;
- pm_parser_err(parser, location, location, diag_id);
+ pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
parser->previous.start = location;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
/**
@@ -13209,10 +12691,10 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
if (accept2(parser, type1, type2)) return;
const uint8_t *location = parser->previous.end;
- pm_parser_err(parser, location, location, diag_id);
+ pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
parser->previous.start = location;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
/**
@@ -13226,20 +12708,43 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide
} else {
pm_parser_err_heredoc_term(parser, ident_start, ident_length);
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
}
+/**
+ * A special expect1 that attaches the error to the opening token location
+ * rather than the current position. This is useful for errors about missing
+ * closing tokens, where we want to point to the line with the opening token
+ * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file.
+ */
+static void
+expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
+ if (accept1(parser, type)) return;
+
+ const uint8_t *start = opening->start;
+ pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
+
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = 0;
+}
+
+/** Flags for controlling expression parsing behavior. */
+#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
+#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2)
+#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4)
+#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8)
+
static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
/**
* This is a wrapper of parse_expression, which also checks whether the
* resulting node is a value expression.
*/
static pm_node_t *
-parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
- pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
pm_assert_value_expression(parser, node);
return node;
}
@@ -13262,7 +12767,7 @@ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bo
* work in all cases, it may need to be refactored later. But it appears to work
* for now.
*/
-static inline bool
+static PRISM_INLINE bool
token_begins_expression_p(pm_token_type_t type) {
switch (type) {
case PM_TOKEN_EQUAL_GREATER:
@@ -13278,6 +12783,7 @@ token_begins_expression_p(pm_token_type_t type) {
case PM_TOKEN_EOF:
case PM_TOKEN_LAMBDA_BEGIN:
case PM_TOKEN_KEYWORD_DO:
+ case PM_TOKEN_KEYWORD_DO_BLOCK:
case PM_TOKEN_KEYWORD_DO_LOOP:
case PM_TOKEN_KEYWORD_END:
case PM_TOKEN_KEYWORD_ELSE:
@@ -13323,14 +12829,89 @@ token_begins_expression_p(pm_token_type_t type) {
* prefixed by the * operator.
*/
static pm_node_t *
-parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ return UP(pm_splat_node_create(parser, &operator, expression));
}
- return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
+ return parse_value_expression(parser, binding_power, flags, diag_id, depth);
+}
+
+static bool
+pm_node_unreference_each(const pm_node_t *node, void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ /* When we are about to destroy a set of nodes that could potentially
+ * contain block exits for the current scope, we need to check if they
+ * are contained in the list of block exits and remove them if they are.
+ */
+ case PM_BREAK_NODE:
+ case PM_NEXT_NODE:
+ case PM_REDO_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ size_t index = 0;
+
+ while (index < parser->current_block_exits->size) {
+ pm_node_t *block_exit = parser->current_block_exits->nodes[index];
+
+ if (block_exit == node) {
+ if (index + 1 < parser->current_block_exits->size) {
+ memmove(
+ &parser->current_block_exits->nodes[index],
+ &parser->current_block_exits->nodes[index + 1],
+ (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
+ );
+ }
+ parser->current_block_exits->size--;
+
+ /* Note returning true here because these nodes could have
+ * arguments that are themselves block exits. */
+ return true;
+ }
+
+ index++;
+ }
+
+ return true;
+ }
+ /* When an implicit local variable is written to or targeted, it becomes
+ * a regular, named local variable. This branch removes it from the list
+ * of implicit parameters when that happens. */
+ case PM_LOCAL_VARIABLE_READ_NODE:
+ case PM_IT_LOCAL_VARIABLE_READ_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
+
+ for (size_t index = 0; index < implicit_parameters->size; index++) {
+ if (implicit_parameters->nodes[index] == node) {
+ /* If the node is not the last one in the list, we need to
+ * shift the remaining nodes down to fill the gap. This is
+ * extremely unlikely to happen. */
+ if (index != implicit_parameters->size - 1) {
+ memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
+ }
+
+ implicit_parameters->size--;
+ break;
+ }
+ }
+
+ return false;
+ }
+ default:
+ return true;
+ }
+}
+
+/**
+ * When we are about to destroy a set of nodes that could potentially be
+ * referenced by one or more lists on the parser, then remove them from those
+ * lists so we don't get a use-after-free.
+ */
+static void
+pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
+ pm_visit_node(node, pm_node_unreference_each, parser);
}
/**
@@ -13345,16 +12926,12 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
// append an =.
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
size_t length = constant->length;
- uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
- if (name == NULL) return;
+ uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
memcpy(name, constant->start, length);
name[length] = '=';
- // Now switch the name to the new string.
- // This silences clang analyzer warning about leak of memory pointed by `name`.
- // NOLINTNEXTLINE(clang-analyzer-*)
- *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
+ *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
}
/**
@@ -13376,35 +12953,10 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
default: break;
}
- pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
+ pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
- pm_node_destroy(parser, target);
- return (pm_node_t *) result;
-}
-
-/**
- * When an implicit local variable is written to or targeted, it becomes a
- * regular, named local variable. This function removes it from the list of
- * implicit parameters when that happens.
- */
-static void
-parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
- pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
-
- for (size_t index = 0; index < implicit_parameters->size; index++) {
- if (implicit_parameters->nodes[index] == node) {
- // If the node is not the last one in the list, we need to shift the
- // remaining nodes down to fill the gap. This is extremely unlikely
- // to happen.
- if (index != implicit_parameters->size - 1) {
- memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
- }
-
- implicit_parameters->size--;
- break;
- }
- }
+ return UP(result);
}
/**
@@ -13418,7 +12970,7 @@ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
static pm_node_t *
parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
switch (PM_NODE_TYPE(target)) {
- case PM_MISSING_NODE:
+ case PM_ERROR_RECOVERY_NODE:
return target;
case PM_SOURCE_ENCODING_NODE:
case PM_FALSE_NODE:
@@ -13456,15 +13008,15 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
case PM_BACK_REFERENCE_READ_NODE:
case PM_NUMBERED_REFERENCE_READ_NODE:
PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
- return target;
+ return UP(pm_error_recovery_node_create_unexpected(parser, target));
case PM_GLOBAL_VARIABLE_READ_NODE:
assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
return target;
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
- parse_target_implicit_parameter(parser, target);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
+ pm_node_unreference(parser, target);
}
const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
@@ -13479,10 +13031,9 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
- pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
+ pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
- parse_target_implicit_parameter(parser, target);
- pm_node_destroy(parser, target);
+ pm_node_unreference(parser, target);
return node;
}
@@ -13505,7 +13056,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
splat->expression = parse_target(parser, splat->expression, multiple, true);
}
- return (pm_node_t *) splat;
+ return UP(splat);
}
case PM_CALL_NODE: {
pm_call_node_t *call = (pm_call_node_t *) target;
@@ -13514,10 +13065,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
// target then this is either a method call or a local variable
// write.
if (
- (call->message_loc.start != NULL) &&
- (call->message_loc.end[-1] != '!') &&
- (call->message_loc.end[-1] != '?') &&
- (call->opening_loc.start == NULL) &&
+ (call->message_loc.length > 0) &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
+ (call->opening_loc.length == 0) &&
(call->arguments == NULL) &&
(call->block == NULL)
) {
@@ -13531,21 +13082,19 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
// When it was parsed in the prefix position, foo was seen as a
// method call with no receiver and no arguments. Now we have an
// =, so we know it's a local variable write.
- const pm_location_t message_loc = call->message_loc;
-
- pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
- pm_node_destroy(parser, target);
+ pm_location_t message_loc = call->message_loc;
+ pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
- return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
+ return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
}
- if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
+ if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
}
parse_write_name(parser, &call->name);
- return (pm_node_t *) pm_call_target_node_create(parser, call);
+ return UP(pm_call_target_node_create(parser, call));
}
}
@@ -13553,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
// an aref expression, and we can transform it into an aset
// expression.
if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
- return (pm_node_t *) pm_index_target_node_create(parser, call);
+ return UP(pm_index_target_node_create(parser, call));
}
}
PRISM_FALLTHROUGH
@@ -13596,7 +13145,7 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
- return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
+ return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
}
return write;
@@ -13608,16 +13157,14 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
static pm_node_t *
parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
switch (PM_NODE_TYPE(target)) {
- case PM_MISSING_NODE:
- pm_node_destroy(parser, value);
+ case PM_ERROR_RECOVERY_NODE:
return target;
case PM_CLASS_VARIABLE_READ_NODE: {
pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
- pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_CONSTANT_PATH_NODE: {
- pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
+ pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
if (context_def_p(parser)) {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
@@ -13626,13 +13173,12 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
return parse_shareable_constant_write(parser, node);
}
case PM_CONSTANT_READ_NODE: {
- pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
+ pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
if (context_def_p(parser)) {
pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
}
- pm_node_destroy(parser, target);
return parse_shareable_constant_write(parser, node);
}
case PM_BACK_REFERENCE_READ_NODE:
@@ -13641,45 +13187,40 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
PRISM_FALLTHROUGH
case PM_GLOBAL_VARIABLE_READ_NODE: {
pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
- pm_node_destroy(parser, target);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_LOCAL_VARIABLE_READ_NODE: {
pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
+ pm_location_t location = target->location;
pm_constant_id_t name = local_read->name;
- pm_location_t name_loc = target->location;
-
uint32_t depth = local_read->depth;
pm_scope_t *scope = pm_parser_scope_find(parser, depth);
- if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
- PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
- parse_target_implicit_parameter(parser, target);
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
+ pm_node_unreference(parser, target);
}
pm_locals_unread(&scope->locals, name);
- pm_node_destroy(parser, target);
- return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
+ return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
- pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
+ pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
- parse_target_implicit_parameter(parser, target);
- pm_node_destroy(parser, target);
+ pm_node_unreference(parser, target);
return node;
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
- pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
- pm_node_destroy(parser, target);
+ pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
return write_node;
}
case PM_MULTI_TARGET_NODE:
- return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
+ return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
case PM_SPLAT_NODE: {
pm_splat_node_t *splat = (pm_splat_node_t *) target;
@@ -13688,9 +13229,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
}
pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
+ pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
- return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
+ return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
}
case PM_CALL_NODE: {
pm_call_node_t *call = (pm_call_node_t *) target;
@@ -13699,10 +13240,10 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
// target then this is either a method call or a local variable
// write.
if (
- (call->message_loc.start != NULL) &&
- (call->message_loc.end[-1] != '!') &&
- (call->message_loc.end[-1] != '?') &&
- (call->opening_loc.start == NULL) &&
+ (call->message_loc.length > 0) &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
+ (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
+ (call->opening_loc.length == 0) &&
(call->arguments == NULL) &&
(call->block == NULL)
) {
@@ -13716,19 +13257,18 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
// When it was parsed in the prefix position, foo was seen as a
// method call with no receiver and no arguments. Now we have an
// =, so we know it's a local variable write.
- const pm_location_t message = call->message_loc;
+ pm_location_t message_loc = call->message_loc;
- pm_parser_local_add_location(parser, message.start, message.end, 0);
- pm_node_destroy(parser, target);
+ pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
+ pm_parser_local_add_location(parser, &message_loc, 0);
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
- target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
+ pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
+ target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
- pm_refute_numbered_parameter(parser, message.start, message.end);
return target;
}
- if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) {
+ if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
// When we get here, we have a method call, because it was
// previously marked as a method call but now we have an =. This
// looks like:
@@ -13742,13 +13282,14 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
call->arguments = arguments;
- pm_arguments_node_arguments_append(arguments, value);
- call->base.location.end = arguments->base.location.end;
+ pm_arguments_node_arguments_append(parser->arena, arguments, value);
+ PM_NODE_LENGTH_SET_NODE(call, arguments);
+ call->equal_loc = TOK2LOC(parser, operator);
parse_write_name(parser, &call->name);
- pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+ pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
- return (pm_node_t *) call;
+ return UP(call);
}
}
@@ -13760,25 +13301,31 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
call->arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(call->arguments, value);
- target->location.end = value->location.end;
+ pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
+ PM_NODE_LENGTH_SET_NODE(target, value);
// Replace the name with "[]=".
call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
+ call->equal_loc = TOK2LOC(parser, operator);
// Ensure that the arguments for []= don't contain keywords
pm_index_arguments_check(parser, call->arguments, call->block);
- pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
+ pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
return target;
}
- // If there are arguments on the call node, then it can't be a method
- // call ending with = or a local variable write, so it must be a
- // syntax error. In this case we'll fall through to our default
+ // If there are arguments on the call node, then it can't be a
+ // method call ending with = or a local variable write, so it must
+ // be a syntax error. In this case we'll fall through to our default
// handling. We need to free the value that we parsed because there
// is no way for us to attach it to the tree at this point.
- pm_node_destroy(parser, value);
+ //
+ // Since it is possible for the value to contain an implicit
+ // parameter somewhere in its subtree, we need to walk it and remove
+ // any implicit parameters from the list of implicit parameters for
+ // the current scope.
+ pm_node_unreference(parser, value);
}
PRISM_FALLTHROUGH
default:
@@ -13809,11 +13356,10 @@ parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t
default: break;
}
- pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
+ pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
- pm_node_destroy(parser, target);
- return (pm_node_t *) result;
+ return UP(result);
}
/**
@@ -13846,35 +13392,35 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
name = parse_target(parser, name, true, true);
}
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+ pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
pm_multi_target_node_targets_append(parser, result, splat);
has_rest = true;
} else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
context_push(parser, PM_CONTEXT_MULTI_TARGET);
- pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
target = parse_target(parser, target, true, false);
pm_multi_target_node_targets_append(parser, result, target);
context_pop(parser);
} else if (token_begins_expression_p(parser->current.type)) {
- pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
target = parse_target(parser, target, true, false);
pm_multi_target_node_targets_append(parser, result, target);
} else if (!match1(parser, PM_TOKEN_EOF)) {
// If we get here, then we have a trailing , in a multi target node.
// We'll add an implicit rest node to represent this.
- pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_multi_target_node_targets_append(parser, result, rest);
break;
}
}
- return (pm_node_t *) result;
+ return UP(result);
}
/**
@@ -13884,7 +13430,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
static pm_node_t *
parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
- accept1(parser, PM_TOKEN_NEWLINE);
+
+ // If we're inside parentheses, then we allow a newline before the
+ // closing parenthesis or equals sign. Outside of parentheses, a newline
+ // is not allowed (e.g., `a, b\n= 1, 2` is not valid).
+ if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ }
// Ensure that we have either an = or a ) after the targets.
if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
@@ -13913,7 +13465,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
context_push(parser, context);
while (true) {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
pm_statements_node_body_append(parser, statements, node, true);
// If we're recovering from a syntax error, then we need to stop parsing
@@ -13953,7 +13505,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
// we were unable to parse an expression, then we will skip past this
// token and continue parsing the statements list. Otherwise we'll add
// an error and continue parsing the statements list.
- if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
+ if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) {
parser_lex(parser);
// If we are at the end of the file, then we need to stop parsing
@@ -13971,13 +13523,14 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
// This is an inlined version of accept1 because the error that we
// want to add has varargs. If this happens again, we should
// probably extract a helper function.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
}
context_pop(parser);
+
bool last_value = true;
switch (context) {
case PM_CONTEXT_BEGIN_ENSURE:
@@ -13998,23 +13551,24 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
*/
static void
pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
- const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
+ const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
if (duplicated != NULL) {
pm_buffer_t buffer = { 0 };
- pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
+ pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
pm_diagnostic_list_append_format(
+ &parser->metadata_arena,
&parser->warning_list,
duplicated->location.start,
- duplicated->location.end,
+ duplicated->location.length,
PM_WARN_DUPLICATED_HASH_KEY,
(int) pm_buffer_length(&buffer),
pm_buffer_value(&buffer),
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+ pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
);
- pm_buffer_free(&buffer);
+ pm_buffer_cleanup(&buffer);
}
}
@@ -14026,14 +13580,15 @@ static void
pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
pm_node_t *previous;
- if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
+ if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
pm_diagnostic_list_append_format(
+ &parser->metadata_arena,
&parser->warning_list,
- node->location.start,
- node->location.end,
+ PM_NODE_START(node),
+ PM_NODE_LENGTH(node),
PM_WARN_DUPLICATED_WHEN_CLAUSE,
- pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line,
- pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line
+ pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
+ pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
);
}
}
@@ -14061,14 +13616,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
// inner hash to share the static literals with the outer
// hash.
parser->current_hash_keys = literals;
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
} else if (token_begins_expression_p(parser->current.type)) {
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
} else {
pm_parser_scope_forwarding_keywords_check(parser, &operator);
}
- element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+ element = UP(pm_assoc_splat_node_create(parser, value, &operator));
contains_keyword_splat = true;
break;
}
@@ -14076,44 +13631,43 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
pm_token_t label = parser->current;
parser_lex(parser);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
+ pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
pm_hash_key_static_literals_add(parser, literals, key);
- pm_token_t operator = not_provided(parser);
pm_node_t *value = NULL;
if (token_begins_expression_p(parser->current.type)) {
- value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
+ value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
} else {
if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
- value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+ value = UP(pm_constant_read_node_create(parser, &constant));
} else {
int depth = -1;
pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
} else {
depth = pm_parser_local_depth(parser, &identifier);
}
if (depth == -1) {
- value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
+ value = UP(pm_call_node_variable_call_create(parser, &identifier));
} else {
- value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
+ value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
}
}
- value->location.end++;
- value = (pm_node_t *) pm_implicit_node_create(parser, value);
+ value->location.length++;
+ value = UP(pm_implicit_node_create(parser, value));
}
- element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ element = UP(pm_assoc_node_create(parser, key, NULL, value));
break;
}
default: {
- pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
+ pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
// Hash keys that are strings are automatically frozen. We will
// mark that here.
@@ -14123,24 +13677,22 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
pm_hash_key_static_literals_add(parser, literals, key);
- pm_token_t operator;
- if (pm_symbol_node_label_p(key)) {
- operator = not_provided(parser);
- } else {
+ pm_token_t operator = { 0 };
+ if (!pm_symbol_node_label_p(parser, key)) {
expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
operator = parser->previous;
}
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
- element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
break;
}
}
if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
- pm_hash_node_elements_append((pm_hash_node_t *) node, element);
+ pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
} else {
- pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
+ pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
}
// If there's no comma after the element, then we're done.
@@ -14161,23 +13713,47 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod
return contains_keyword_splat;
}
+static PRISM_INLINE bool
+argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
+ if (pm_symbol_node_label_p(parser, argument)) {
+ return true;
+ }
+
+ switch (PM_NODE_TYPE(argument)) {
+ case PM_CALL_NODE: {
+ pm_call_node_t *cast = (pm_call_node_t *) argument;
+ if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
+ if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
+ return false;
+ }
+ if (cast->block != NULL) {
+ return false;
+ }
+ }
+ break;
+ }
+ default: break;
+ }
+ return accept1(parser, PM_TOKEN_EQUAL_GREATER);
+}
+
/**
* Append an argument to a list of arguments.
*/
-static inline void
+static PRISM_INLINE void
parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
if (arguments->arguments == NULL) {
arguments->arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(arguments->arguments, argument);
+ pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
}
/**
* Parse a list of arguments.
*/
static void
-parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
+parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
// First we need to check if the next token is one that could be the start
@@ -14210,16 +13786,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
}
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
- argument = (pm_node_t *) hash;
+ argument = UP(hash);
pm_static_literals_t hash_keys = { 0 };
- bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
+ bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
parse_arguments_append(parser, arguments, argument);
- pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
- if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
- pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+ pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set(UP(arguments->arguments), node_flags);
pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
@@ -14232,12 +13808,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_node_t *expression = NULL;
if (token_begins_expression_p(parser->current.type)) {
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
} else {
pm_parser_scope_forwarding_block_check(parser, &operator);
}
- argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
+ argument = UP(pm_block_argument_node_create(parser, &operator, expression));
if (parsed_block_argument) {
parse_arguments_append(parser, arguments, argument);
} else {
@@ -14257,18 +13833,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
pm_parser_scope_forwarding_positionals_check(parser, &operator);
- argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
+ argument = UP(pm_splat_node_create(parser, &operator, NULL));
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
}
} else {
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
if (parsed_bare_hash) {
- pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
}
- argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ argument = UP(pm_splat_node_create(parser, &operator, expression));
}
parse_arguments_append(parser, arguments, argument);
@@ -14283,26 +13859,26 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
// not actually argument forwarding but was instead a
// range.
pm_token_t operator = parser->previous;
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// If we parse a range, we need to validate that we
// didn't accidentally violate the nonassoc rules of the
// ... operator.
if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
pm_range_node_t *range = (pm_range_node_t *) right;
- pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
+ pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
}
- argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ argument = UP(pm_range_node_create(parser, NULL, &operator, right));
} else {
pm_parser_scope_forwarding_all_check(parser, &parser->previous);
if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
}
- argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
+ argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
parse_arguments_append(parser, arguments, argument);
- pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
+ pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
arguments->has_forwarding = true;
parsed_forwarding_arguments = true;
break;
@@ -14312,22 +13888,20 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
PRISM_FALLTHROUGH
default: {
if (argument == NULL) {
- argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
}
bool contains_keywords = false;
bool contains_keyword_splat = false;
- if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+ if (argument_allowed_for_bare_hash(parser, argument)) {
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
}
- pm_token_t operator;
+ pm_token_t operator = { 0 };
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
operator = parser->previous;
- } else {
- operator = not_provided(parser);
}
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
@@ -14338,18 +13912,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_hash_key_static_literals_add(parser, &hash_keys, argument);
// Finish parsing the one we are part way through.
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
- argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
- pm_keyword_hash_node_elements_append(bare_hash, argument);
- argument = (pm_node_t *) bare_hash;
+ pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
+ argument = UP(bare_hash);
// Then parse more if we have a comma
if (accept1(parser, PM_TOKEN_COMMA) && (
token_begins_expression_p(parser->current.type) ||
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
)) {
- contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
+ contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
}
pm_static_literals_free(&hash_keys);
@@ -14358,10 +13932,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
parse_arguments_append(parser, arguments, argument);
- pm_node_flags_t flags = 0;
- if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
- if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
- pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
+ pm_node_flags_t node_flags = 0;
+ if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
+ if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
+ pm_node_flag_set(UP(arguments->arguments), node_flags);
break;
}
@@ -14370,7 +13944,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
parsed_first_argument = true;
// If parsing the argument failed, we need to stop parsing arguments.
- if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
+ if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break;
// If the terminator of these arguments is not EOF, then we have a
// specific token we're looking for. In that case we can accept a
@@ -14390,6 +13964,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
if (accepted_newline) {
pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
}
+
+ // If this is a command call and an argument takes a block,
+ // there can be no further arguments. For example,
+ // `foo(bar 1 do end, 2)` should be rejected.
+ if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
+ pm_call_node_t *call = (pm_call_node_t *) argument;
+ if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
+ pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
+ break;
+ }
+ }
} else {
// If there is no comma at the end of the argument list then we're
// done parsing arguments and can break out of this loop.
@@ -14417,7 +14002,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
- pm_multi_target_node_opening_set(node, &parser->previous);
+ pm_multi_target_node_opening_set(parser, node, &parser->previous);
do {
pm_node_t *param;
@@ -14427,33 +14012,33 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
// commas, so here we'll assume this is a mistake of the user not
// knowing it's not allowed here.
if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
+ param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
pm_multi_target_node_targets_append(parser, node, param);
pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
break;
}
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
- param = (pm_node_t *) parse_required_destructured_parameter(parser);
+ param = UP(parse_required_destructured_parameter(parser));
} else if (accept1(parser, PM_TOKEN_USTAR)) {
pm_token_t star = parser->previous;
pm_node_t *value = NULL;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
pm_token_t name = parser->previous;
- value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+ value = UP(pm_required_parameter_node_create(parser, &name));
if (pm_parser_parameter_name_check(parser, &name)) {
pm_node_flag_set_repeated_parameter(value);
}
pm_parser_local_add_token(parser, &name, 1);
}
- param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
+ param = UP(pm_splat_node_create(parser, &star, value));
} else {
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
pm_token_t name = parser->previous;
- param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+ param = UP(pm_required_parameter_node_create(parser, &name));
if (pm_parser_parameter_name_check(parser, &name)) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -14465,7 +14050,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
- pm_multi_target_node_closing_set(node, &parser->previous);
+ pm_multi_target_node_closing_set(parser, node, &parser->previous);
return node;
}
@@ -14541,6 +14126,43 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
return true;
}
+static PRISM_INLINE void
+parse_parameters_handle_trailing_comma(
+ pm_parser_t *parser,
+ pm_parameters_node_t *params,
+ pm_parameters_order_t order,
+ bool in_block,
+ bool allows_trailing_comma
+) {
+ if (!allows_trailing_comma) {
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+ return;
+ }
+
+ if (in_block) {
+ if (order >= PM_PARAMETERS_ORDER_NAMED) {
+ // foo do |bar,|; end
+ pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+
+ if (params->rest == NULL) {
+ pm_parameters_node_rest_set(params, param);
+ } else {
+ pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
+ pm_parameters_node_posts_append(parser->arena, params, UP(param));
+ }
+ } else {
+ // foo do |*bar,|; end
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+ }
+ } else {
+ // https://bugs.ruby-lang.org/issues/19107
+ // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
+ pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+ }
+ }
+}
+
/**
* Parse a list of parameters on a method definition.
*/
@@ -14553,6 +14175,7 @@ parse_parameters(
bool allows_forwarding_parameters,
bool accepts_blocks_in_defaults,
bool in_block,
+ pm_diagnostic_id_t diag_id_forwarding,
uint16_t depth
) {
pm_do_loop_stack_push(parser, false);
@@ -14566,12 +14189,12 @@ parse_parameters(
switch (parser->current.type) {
case PM_TOKEN_PARENTHESIS_LEFT: {
update_parameter_state(parser, &parser->current, &order);
- pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
+ pm_node_t *param = UP(parse_required_destructured_parameter(parser));
if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
- pm_parameters_node_requireds_append(params, param);
+ pm_parameters_node_requireds_append(parser->arena, params, param);
} else {
- pm_parameters_node_posts_append(params, param);
+ pm_parameters_node_posts_append(parser->arena, params, param);
}
break;
}
@@ -14581,34 +14204,40 @@ parse_parameters(
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_token_t name;
+ pm_node_t *param;
- bool repeated = false;
- if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
- name = parser->previous;
- repeated = pm_parser_parameter_name_check(parser, &name);
- pm_parser_local_add_token(parser, &name, 1);
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
+ param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
} else {
- name = not_provided(parser);
- parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
- }
+ pm_token_t name = {0};
- pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
- if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ bool repeated = false;
+ if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
+ name = parser->previous;
+ repeated = pm_parser_parameter_name_check(parser, &name);
+ pm_parser_local_add_token(parser, &name, 1);
+ } else {
+ parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
+ }
+
+ param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
+ if (repeated) {
+ pm_node_flag_set_repeated_parameter(param);
+ }
}
+
if (params->block == NULL) {
pm_parameters_node_block_set(params, param);
} else {
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
+ pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
}
break;
}
case PM_TOKEN_UDOT_DOT_DOT: {
if (!allows_forwarding_parameters) {
- pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+ pm_parser_err_current(parser, diag_id_forwarding);
}
bool succeeded = update_parameter_state(parser, &parser->current, &order);
@@ -14621,12 +14250,12 @@ parse_parameters(
// If we already have a keyword rest parameter, then we replace it with the
// forwarding parameter and move the keyword rest parameter to the posts list.
pm_node_t *keyword_rest = params->keyword_rest;
- pm_parameters_node_posts_append(params, keyword_rest);
+ pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest)));
if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
params->keyword_rest = NULL;
}
- pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
+ pm_parameters_node_keyword_rest_set(params, UP(param));
break;
}
case PM_TOKEN_CLASS_VARIABLE:
@@ -14671,24 +14300,24 @@ parse_parameters(
parser_lex(parser);
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
- uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
- pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *) param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_optionals_append(params, param);
+ pm_parameters_node_optionals_append(parser->arena, params, param);
// If the value of the parameter increased the number of
// reads of that parameter, then we need to warn that we
// have a circular definition.
- if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
+ if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
}
context_pop(parser);
@@ -14703,15 +14332,15 @@ parse_parameters(
} else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_requireds_append(params, (pm_node_t *) param);
+ pm_parameters_node_requireds_append(parser->arena, params, UP(param));
} else {
pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
if (repeated) {
- pm_node_flag_set_repeated_parameter((pm_node_t *)param);
+ pm_node_flag_set_repeated_parameter(UP(param));
}
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
+ pm_parameters_node_posts_append(parser->arena, params, UP(param));
}
break;
@@ -14728,9 +14357,9 @@ parse_parameters(
local.end -= 1;
if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
- pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+ pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
} else if (local.end[-1] == '!' || local.end[-1] == '?') {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
}
bool repeated = pm_parser_parameter_name_check(parser, &local);
@@ -14742,12 +14371,12 @@ parse_parameters(
case PM_TOKEN_PIPE: {
context_pop(parser);
- pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
- pm_parameters_node_keywords_append(params, param);
+ pm_parameters_node_keywords_append(parser->arena, params, param);
break;
}
case PM_TOKEN_SEMICOLON:
@@ -14759,12 +14388,12 @@ parse_parameters(
break;
}
- pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
- pm_parameters_node_keywords_append(params, param);
+ pm_parameters_node_keywords_append(parser->arena, params, param);
break;
}
default: {
@@ -14772,20 +14401,20 @@ parse_parameters(
if (token_begins_expression_p(parser->current.type)) {
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
- uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
+ uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
- pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
- if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
+ if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
}
- param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
+ param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
}
else {
- param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
+ param = UP(pm_required_keyword_parameter_node_create(parser, &name));
}
if (repeated) {
@@ -14793,7 +14422,7 @@ parse_parameters(
}
context_pop(parser);
- pm_parameters_node_keywords_append(params, param);
+ pm_parameters_node_keywords_append(parser->arena, params, param);
// If parsing the value of the parameter resulted in error recovery,
// then we can put a missing node in its place and stop parsing the
@@ -14814,7 +14443,7 @@ parse_parameters(
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_token_t name;
+ pm_token_t name = { 0 };
bool repeated = false;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -14822,11 +14451,10 @@ parse_parameters(
repeated = pm_parser_parameter_name_check(parser, &name);
pm_parser_local_add_token(parser, &name, 1);
} else {
- name = not_provided(parser);
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
}
- pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
+ pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -14835,7 +14463,7 @@ parse_parameters(
pm_parameters_node_rest_set(params, param);
} else {
pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, param);
+ pm_parameters_node_posts_append(parser->arena, params, param);
}
break;
@@ -14854,9 +14482,9 @@ parse_parameters(
pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
}
- param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+ param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
} else {
- pm_token_t name;
+ pm_token_t name = { 0 };
bool repeated = false;
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -14864,11 +14492,10 @@ parse_parameters(
repeated = pm_parser_parameter_name_check(parser, &name);
pm_parser_local_add_token(parser, &name, 1);
} else {
- name = not_provided(parser);
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
}
- param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
+ param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
if (repeated) {
pm_node_flag_set_repeated_parameter(param);
}
@@ -14878,27 +14505,14 @@ parse_parameters(
pm_parameters_node_keyword_rest_set(params, param);
} else {
pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, param);
+ pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
}
break;
}
default:
if (parser->previous.type == PM_TOKEN_COMMA) {
- if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
- // If we get here, then we have a trailing comma in a
- // block parameter list.
- pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
-
- if (params->rest == NULL) {
- pm_parameters_node_rest_set(params, param);
- } else {
- pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
- pm_parameters_node_posts_append(params, (pm_node_t *) param);
- }
- } else {
- pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
- }
+ parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
}
parsing = false;
@@ -14930,8 +14544,7 @@ parse_parameters(
pm_do_loop_stack_pop(parser);
// If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
- if (params->base.location.start == params->base.location.end) {
- pm_node_destroy(parser, (pm_node_t *) params);
+ if (PM_NODE_START(params) == PM_NODE_END(params)) {
return NULL;
}
@@ -14948,13 +14561,13 @@ token_newline_index(const pm_parser_t *parser) {
// This is the common case. In this case we can look at the previously
// recorded newline in the newline list and subtract from the current
// offset.
- return parser->newline_list.size - 1;
+ return parser->line_offsets.size - 1;
} else {
// This is unlikely. This is the case that we have already parsed the
// start of a heredoc, so we cannot rely on looking at the previous
// offset of the newline list, and instead must go through the whole
// process of a binary search for the line number.
- return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
+ return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
}
}
@@ -14964,7 +14577,7 @@ token_newline_index(const pm_parser_t *parser) {
*/
static int64_t
token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
- const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
+ const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
const uint8_t *end = token->start;
// Skip over the BOM if it is present.
@@ -15028,8 +14641,8 @@ parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_ind
// Otherwise, add a warning.
PM_PARSER_WARN_FORMAT(
parser,
- closing_token->start,
- closing_token->end,
+ PM_TOKEN_START(parser, closing_token),
+ PM_TOKEN_LENGTH(closing_token),
PM_WARN_INDENTATION_MISMATCH,
(int) (closing_token->end - closing_token->start),
(const char *) closing_token->start,
@@ -15053,7 +14666,7 @@ typedef enum {
* Parse any number of rescue clauses. This will form a linked list of if
* nodes pointing to each other from the top.
*/
-static inline void
+static PRISM_INLINE void
parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
pm_rescue_node_t *current = NULL;
@@ -15069,9 +14682,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// we're going to have an empty list of exceptions to rescue (which
// implies StandardError).
parser_lex(parser);
- pm_rescue_node_operator_set(rescue, &parser->previous);
+ pm_rescue_node_operator_set(parser, rescue, &parser->previous);
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
@@ -15090,7 +14703,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
do {
pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
- pm_rescue_node_exceptions_append(rescue, expression);
+ pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
// If we hit a newline, then this is the end of the rescue expression. We
// can continue on to parse the statements.
@@ -15099,9 +14712,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// If we hit a `=>` then we're going to parse the exception variable. Once
// we've done that, we'll break out of the loop and parse the statements.
if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
- pm_rescue_node_operator_set(rescue, &parser->previous);
+ pm_rescue_node_operator_set(parser, rescue, &parser->previous);
- pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
+ pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
reference = parse_target(parser, reference, false, false);
pm_rescue_node_reference_set(rescue, reference);
@@ -15114,11 +14727,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
- rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+ rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
}
} else {
expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
- rescue->then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(&parser->previous);
+ rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
}
if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
@@ -15156,11 +14769,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// since we won't know the end until we've found all subsequent
// clauses. This sets the end location on all rescues once we know it.
if (current != NULL) {
- const uint8_t *end_to_set = current->base.location.end;
pm_rescue_node_t *clause = parent_node->rescue_clause;
while (clause != NULL) {
- clause->base.location.end = end_to_set;
+ PM_NODE_LENGTH_SET_NODE(clause, current);
clause = clause->subsequent;
}
}
@@ -15203,7 +14815,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
// If we don't have a `current` rescue node, then this is a dangling
// else, and it's an error.
- if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
+ if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
}
if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
@@ -15241,10 +14853,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
if (match1(parser, PM_TOKEN_KEYWORD_END)) {
if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
- pm_begin_node_end_keyword_set(parent_node, &parser->current);
+ pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
} else {
- pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- pm_begin_node_end_keyword_set(parent_node, &end_keyword);
+ pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
+ pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
}
}
@@ -15254,11 +14866,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
*/
static pm_begin_node_t *
parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
- pm_token_t begin_keyword = not_provided(parser);
- pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
-
+ pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
- node->base.location.start = start;
+
+ node->base.location.start = U32(start - parser->start);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
return node;
}
@@ -15277,6 +14889,9 @@ parse_block_parameters(
) {
pm_parameters_node_t *parameters = NULL;
if (!match1(parser, PM_TOKEN_SEMICOLON)) {
+ if (!is_lambda_literal) {
+ context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
+ }
parameters = parse_parameters(
parser,
is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
@@ -15285,12 +14900,16 @@ parse_block_parameters(
false,
accepts_blocks_in_defaults,
true,
+ is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
(uint16_t) (depth + 1)
);
+ if (!is_lambda_literal) {
+ context_pop(parser);
+ }
}
pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
- if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
+ if (opening != NULL) {
accept1(parser, PM_TOKEN_NEWLINE);
if (accept1(parser, PM_TOKEN_SEMICOLON)) {
@@ -15321,9 +14940,9 @@ parse_block_parameters(
pm_parser_local_add_token(parser, &parser->previous, 1);
pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
- if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
+ if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
- pm_block_parameters_node_append_local(block_parameters, local);
+ pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
} while (accept1(parser, PM_TOKEN_COMMA));
}
}
@@ -15403,8 +15022,8 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
} else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
- } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
+ } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
} else {
assert(false && "unreachable");
}
@@ -15423,13 +15042,11 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_
for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
}
-
- const pm_location_t location = { .start = opening->start, .end = closing->end };
- return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
+ return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
}
if (it_parameter) {
- return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
+ return UP(pm_it_parameters_node_create(parser, opening, closing));
}
return NULL;
@@ -15461,7 +15078,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
}
- pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+ pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
}
accept1(parser, PM_TOKEN_NEWLINE);
@@ -15469,30 +15086,30 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
if (opening.type == PM_TOKEN_BRACE_LEFT) {
if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
+ statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
}
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
} else {
if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
+ statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
pm_accepts_block_stack_pop(parser);
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
+ statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
}
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
}
pm_constant_id_list_t locals;
pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
- pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
+ pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
pm_parser_scope_pop(parser);
pm_accepts_block_stack_pop(parser);
@@ -15506,42 +15123,54 @@ parse_block(pm_parser_t *parser, uint16_t depth) {
* arguments, or blocks).
*/
static bool
-parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
+parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
+ /* Fast path: if the current token can't begin an expression and isn't
+ * a parenthesis, block opener, or splat/block-pass operator, there are
+ * no arguments to parse. */
+ if (
+ !token_begins_expression_p(parser->current.type) &&
+ !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
+ ) {
+ return false;
+ }
+
bool found = false;
+ bool parsed_command_args = false;
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
found |= true;
- arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments->opening_loc = TOK2LOC(parser, &parser->previous);
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments->closing_loc = TOK2LOC(parser, &parser->previous);
} else {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
pm_accepts_block_stack_pop(parser);
- arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments->closing_loc = TOK2LOC(parser, &parser->previous);
}
- } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
+ } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
found |= true;
+ parsed_command_args = true;
pm_accepts_block_stack_push(parser, false);
// If we get here, then the subsequent token cannot be used as an infix
// operator. In this case we assume the subsequent token is part of an
// argument to this method call.
- parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+ parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
// If we have done with the arguments and still not consumed the comma,
// then we have a trailing comma where we need to check whether it is
// allowed or not.
if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type));
}
pm_accepts_block_stack_pop(parser);
@@ -15560,21 +15189,24 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
} else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
found |= true;
block = parse_block(parser, (uint16_t) (depth + 1));
+ } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
+ found |= true;
+ block = parse_block(parser, (uint16_t) (depth + 1));
}
if (block != NULL) {
if (arguments->block == NULL && !arguments->has_forwarding) {
- arguments->block = (pm_node_t *) block;
+ arguments->block = UP(block);
} else {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
if (arguments->block != NULL) {
if (arguments->arguments == NULL) {
arguments->arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
+ pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
}
- arguments->block = (pm_node_t *) block;
+ arguments->block = UP(block);
}
}
}
@@ -15642,6 +15274,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
case PM_CONTEXT_BLOCK_ENSURE:
case PM_CONTEXT_BLOCK_KEYWORDS:
case PM_CONTEXT_BLOCK_RESCUE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_DEF_ELSE:
case PM_CONTEXT_DEF_ENSURE:
case PM_CONTEXT_DEF_PARAMS:
@@ -15661,7 +15294,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) {
break;
}
}
- if (in_sclass) {
+ if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
}
}
@@ -15678,6 +15311,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
case PM_CONTEXT_BLOCK_KEYWORDS:
case PM_CONTEXT_BLOCK_ELSE:
case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_BLOCK_RESCUE:
case PM_CONTEXT_DEFINED:
case PM_CONTEXT_FOR:
@@ -15687,12 +15321,19 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
case PM_CONTEXT_LAMBDA_ENSURE:
case PM_CONTEXT_LAMBDA_RESCUE:
case PM_CONTEXT_LOOP_PREDICATE:
- case PM_CONTEXT_POSTEXE:
case PM_CONTEXT_UNTIL:
case PM_CONTEXT_WHILE:
// These are the good cases. We're allowed to have a block exit
// in these contexts.
return;
+ case PM_CONTEXT_POSTEXE:
+ // https://bugs.ruby-lang.org/issues/20409
+ if (context_node->context == PM_CONTEXT_POSTEXE) {
+ if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
+ return;
+ }
+ }
+ PRISM_FALLTHROUGH
case PM_CONTEXT_DEF:
case PM_CONTEXT_DEF_PARAMS:
case PM_CONTEXT_DEF_ELSE:
@@ -15714,7 +15355,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
// block exit to the list of exits for the expression, and
// the node parsing will handle validating it instead.
assert(parser->current_block_exits != NULL);
- pm_node_list_append(parser->current_block_exits, node);
+ pm_node_list_append(parser->arena, parser->current_block_exits, node);
return;
case PM_CONTEXT_BEGIN_ELSE:
case PM_CONTEXT_BEGIN_ENSURE:
@@ -15805,7 +15446,7 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
// However, they could still become valid in a higher level context if
// there is another list above this one. In this case we'll push all of
// the block exits up to the previous list.
- pm_node_list_concat(previous_block_exits, parser->current_block_exits);
+ pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
parser->current_block_exits = previous_block_exits;
} else {
// If we did not match a trailing while/until and this was the last
@@ -15815,11 +15456,11 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
}
}
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
context_push(parser, PM_CONTEXT_PREDICATE);
pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
// Predicates are closed by a term, a "then", or a term and then a "then".
bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -15837,13 +15478,13 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
return predicate;
}
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
pm_node_list_t current_block_exits = { 0 };
pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
pm_token_t keyword = parser->previous;
- pm_token_t then_keyword = not_provided(parser);
+ pm_token_t then_keyword = { 0 };
pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
pm_statements_node_t *statements = NULL;
@@ -15855,15 +15496,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
}
- pm_token_t end_keyword = not_provided(parser);
pm_node_t *parent = NULL;
switch (context) {
case PM_CONTEXT_IF:
- parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+ parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
break;
case PM_CONTEXT_UNLESS:
- parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
+ parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
break;
default:
assert(false && "unreachable");
@@ -15877,7 +15517,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
if (context == PM_CONTEXT_IF) {
while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
if (parser_end_of_line_p(parser)) {
- PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
@@ -15891,7 +15531,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
pm_accepts_block_stack_pop(parser);
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
+ pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
((pm_if_node_t *) current)->subsequent = elsif;
current = elsif;
}
@@ -15910,13 +15550,13 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
switch (context) {
case PM_CONTEXT_IF:
- ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
+ ((pm_if_node_t *) current)->subsequent = UP(else_node);
break;
case PM_CONTEXT_UNLESS:
((pm_unless_node_t *) parent)->else_clause = else_node;
@@ -15927,7 +15567,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
}
} else {
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
}
// Set the appropriate end location for all of the nodes in the subtree.
@@ -15939,12 +15579,12 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
while (recursing) {
switch (PM_NODE_TYPE(current)) {
case PM_IF_NODE:
- pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
+ pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
current = ((pm_if_node_t *) current)->subsequent;
recursing = current != NULL;
break;
case PM_ELSE_NODE:
- pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
+ pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
recursing = false;
break;
default: {
@@ -15956,7 +15596,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
break;
}
case PM_CONTEXT_UNLESS:
- pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
+ pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
break;
default:
assert(false && "unreachable");
@@ -15964,8 +15604,6 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
}
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
return parent;
}
@@ -15976,7 +15614,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl
#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
- case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
+ case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
@@ -16039,7 +15677,7 @@ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int
* If the encoding was explicitly set through the lexing process, then we need
* to potentially mark the string's flags to indicate how to encode it.
*/
-static inline pm_node_flags_t
+static PRISM_INLINE pm_node_flags_t
parse_unescaped_encoding(const pm_parser_t *parser) {
if (parser->explicit_encoding != NULL) {
if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
@@ -16071,10 +15709,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
// "aaa #{bbb} #@ccc ddd"
// ^^^^ ^ ^^^^
case PM_TOKEN_STRING_CONTENT: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+ pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
parser_lex(parser);
@@ -16101,7 +15736,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = NULL;
- if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
+ if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
pm_accepts_block_stack_push(parser, true);
statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
@@ -16109,9 +15744,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
parser->brace_nesting = brace_nesting;
lex_state_set(parser, state);
-
expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
- pm_token_t closing = parser->previous;
// If this set of embedded statements only contains a single
// statement, then Ruby does not consider it as a possible statement
@@ -16120,7 +15753,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
}
- return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
+ return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
}
// Here the lexer has returned the beginning of an embedded variable.
@@ -16145,42 +15778,42 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
// create a global variable read node.
case PM_TOKEN_BACK_REFERENCE:
parser_lex(parser);
- variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
break;
// In this case an nth reference is being interpolated. We'll
// create a global variable read node.
case PM_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
- variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
break;
// In this case a global variable is being interpolated. We'll
// create a global variable read node.
case PM_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
break;
// In this case an instance variable is being interpolated.
// We'll create an instance variable read node.
case PM_TOKEN_INSTANCE_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
break;
// In this case a class variable is being interpolated. We'll
// create a class variable read node.
case PM_TOKEN_CLASS_VARIABLE:
parser_lex(parser);
- variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
break;
// We can hit here if we got an invalid token. In that case
// we'll not attempt to lex this token and instead just return a
// missing node.
default:
expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
- variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
break;
}
- return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
+ return UP(pm_embedded_variable_node_create(parser, &operator, variable));
}
default:
parser_lex(parser);
@@ -16208,18 +15841,16 @@ parse_operator_symbol_name(const pm_token_t *name) {
static pm_node_t *
parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
const uint8_t *end = parse_operator_symbol_name(&parser->current);
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
parser_lex(parser);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
- pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
+ pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
/**
@@ -16253,13 +15884,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
break;
}
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
if (lex_mode->as.string.interpolation) {
@@ -16267,10 +15896,13 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
if (match1(parser, PM_TOKEN_STRING_END)) {
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
parser_lex(parser);
+ pm_token_t content = {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->previous.start,
+ .end = parser->previous.start
+ };
- pm_token_t content = not_provided(parser);
- pm_token_t closing = parser->previous;
- return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
+ return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
}
// Now we can parse the first part of the symbol.
@@ -16282,15 +15914,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
- return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
+ return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
}
pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
- if (part) pm_interpolated_symbol_node_append(symbol, part);
+ if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_interpolated_symbol_node_append(symbol, part);
+ pm_interpolated_symbol_node_append(parser->arena, symbol, part);
}
}
@@ -16301,8 +15933,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
}
- pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
- return (pm_node_t *) symbol;
+ pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
+ return UP(symbol);
}
pm_token_t content;
@@ -16324,13 +15956,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
// interpolated string node, so that's what we'll do here.
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
- pm_token_t bounds = not_provided(parser);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
+ pm_interpolated_symbol_node_append(parser->arena, symbol, part);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
- pm_interpolated_symbol_node_append(symbol, part);
-
- part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
- pm_interpolated_symbol_node_append(symbol, part);
+ part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
+ pm_interpolated_symbol_node_append(parser->arena, symbol, part);
if (next_state != PM_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
@@ -16339,8 +15969,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
parser_lex(parser);
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
- pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
- return (pm_node_t *) symbol;
+ pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
+ return UP(symbol);
}
} else {
content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
@@ -16357,34 +15987,29 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
}
- return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
+ return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
}
/**
* Parse an argument to undef which can either be a bare word, a symbol, a
* constant, or an interpolated symbol.
*/
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
switch (parser->current.type) {
- case PM_CASE_OPERATOR: {
- const pm_token_t opening = not_provided(parser);
- return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
- }
+ case PM_CASE_OPERATOR:
+ return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
case PM_CASE_KEYWORD:
case PM_TOKEN_CONSTANT:
case PM_TOKEN_IDENTIFIER:
case PM_TOKEN_METHOD_NAME: {
parser_lex(parser);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -16394,7 +16019,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
}
default:
pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
}
}
@@ -16404,13 +16029,11 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
* we need to set the lex state to PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM
* between the first and second arguments.
*/
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
switch (parser->current.type) {
- case PM_CASE_OPERATOR: {
- const pm_token_t opening = not_provided(parser);
- return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
- }
+ case PM_CASE_OPERATOR:
+ return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
case PM_CASE_KEYWORD:
case PM_TOKEN_CONSTANT:
case PM_TOKEN_IDENTIFIER:
@@ -16418,14 +16041,11 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
parser_lex(parser);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
- pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
+ pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
- return (pm_node_t *) symbol;
+ return UP(symbol);
}
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -16435,16 +16055,16 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
}
case PM_TOKEN_BACK_REFERENCE:
parser_lex(parser);
- return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ return UP(pm_back_reference_read_node_create(parser, &parser->previous));
case PM_TOKEN_NUMBERED_REFERENCE:
parser_lex(parser);
- return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
case PM_TOKEN_GLOBAL_VARIABLE:
parser_lex(parser);
- return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ return UP(pm_global_variable_read_node_create(parser, &parser->previous));
default:
pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
}
}
@@ -16456,10 +16076,10 @@ static pm_node_t *
parse_variable(pm_parser_t *parser) {
pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
int depth;
- bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
- return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
+ return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
}
pm_scope_t *current_scope = parser->current_scope;
@@ -16478,13 +16098,13 @@ parse_variable(pm_parser_t *parser) {
parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
}
- pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
- pm_node_list_append(&current_scope->implicit_parameters, node);
+ pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
+ pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
return node;
- } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
- pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
- pm_node_list_append(&current_scope->implicit_parameters, node);
+ } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
+ pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
+ pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
return node;
}
@@ -16507,9 +16127,9 @@ parse_variable_call(pm_parser_t *parser) {
}
pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
- pm_node_flag_set((pm_node_t *)node, flags);
+ pm_node_flag_set(UP(node), flags);
- return (pm_node_t *) node;
+ return UP(node);
}
/**
@@ -16517,7 +16137,7 @@ parse_variable_call(pm_parser_t *parser) {
* parser. If it does not match a valid method definition name, then a missing
* token is returned.
*/
-static inline pm_token_t
+static PRISM_INLINE pm_token_t
parse_method_definition_name(pm_parser_t *parser) {
switch (parser->current.type) {
case PM_CASE_KEYWORD:
@@ -16526,7 +16146,7 @@ parse_method_definition_name(pm_parser_t *parser) {
parser_lex(parser);
return parser->previous;
case PM_TOKEN_IDENTIFIER:
- pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
+ pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
parser_lex(parser);
return parser->previous;
case PM_CASE_OPERATOR:
@@ -16534,22 +16154,31 @@ parse_method_definition_name(pm_parser_t *parser) {
parser_lex(parser);
return parser->previous;
default:
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
- return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type));
+ return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
}
}
static void
-parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
- // Get a reference to the string struct that is being held by the string
- // node. This is the value we're going to actually manipulate.
- pm_string_ensure_owned(string);
+parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
+ // Make a writable copy in the arena if the string isn't already writable.
+ // We keep a mutable pointer to the arena memory so we can memmove into it
+ // below without casting away const from the string's source field.
+ uint8_t *writable;
+
+ if (string->type != PM_STRING_OWNED) {
+ size_t length = pm_string_length(string);
+ writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
+ pm_string_constant_init(string, (const char *) writable, length);
+ } else {
+ writable = (uint8_t *) string->source;
+ }
// Now get the bounds of the existing string. We'll use this as a
// destination to move bytes into. We'll also use it for bounds checking
// since we don't require that these strings be null terminated.
size_t dest_length = pm_string_length(string);
- const uint8_t *source_cursor = (uint8_t *) string->source;
+ const uint8_t *source_cursor = writable;
const uint8_t *source_end = source_cursor + dest_length;
// We're going to move bytes backward in the string when we get leading
@@ -16573,11 +16202,24 @@ parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
dest_length--;
}
- memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
+ memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
string->length = dest_length;
}
/**
+ * If we end up trimming all of the whitespace from a node and it isn't
+ * part of a line continuation, then we'll drop it from the list entirely.
+ */
+static PRISM_INLINE bool
+heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
+ if (string_node->unescaped.length == 0) {
+ const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
+ return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
+ }
+ return false;
+}
+
+/**
* Take a heredoc node that is indented by a ~ and trim the leading whitespace.
*/
static void
@@ -16587,8 +16229,7 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
bool dedent_next = true;
// Iterate over all nodes, and trim whitespace accordingly. We're going to
- // keep around two indices: a read and a write. If we end up trimming all of
- // the whitespace from a node, then we'll drop it from the list entirely.
+ // keep around two indices: a read and a write.
size_t write_index = 0;
pm_node_t *node;
@@ -16604,11 +16245,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
pm_string_node_t *string_node = ((pm_string_node_t *) node);
if (dedent_next) {
- parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
+ parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
}
- if (string_node->unescaped.length == 0) {
- pm_node_destroy(parser, node);
+ if (heredoc_dedent_discard_string_node(parser, string_node)) {
} else {
nodes->nodes[write_index++] = node;
}
@@ -16631,7 +16271,7 @@ parse_strings_empty_content(const uint8_t *location) {
/**
* Parse a set of strings that could be concatenated together.
*/
-static inline pm_node_t *
+static PRISM_INLINE pm_node_t *
parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
bool concating = false;
@@ -16658,16 +16298,14 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
pm_string_shared_init(&string->unescaped, content.start, content.end);
- node = (pm_node_t *) string;
+ node = UP(string);
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
// If we get here, then we have an end of a label immediately
// after a start. In that case we'll create an empty symbol
// node.
- pm_token_t content = parse_strings_empty_content(parser->previous.start);
- pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
-
- pm_string_shared_init(&symbol->unescaped, content.start, content.end);
- node = (pm_node_t *) symbol;
+ pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
+ pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
+ node = UP(symbol);
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (!lex_interpolation) {
@@ -16678,7 +16316,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
if (match1(parser, PM_TOKEN_EOF)) {
unescaped = PM_STRING_EMPTY;
- content = not_provided(parser);
+ content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
} else {
unescaped = parser->current_string;
expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
@@ -16698,34 +16336,30 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
// be able to contain all of the parts.
if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
pm_node_list_t parts = { 0 };
-
- pm_token_t delimiters = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
- pm_node_list_append(&parts, part);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
+ pm_node_list_append(parser->arena, &parts, part);
do {
- part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
- pm_node_list_append(&parts, part);
+ part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ pm_node_list_append(parser->arena, &parts, part);
parser_lex(parser);
} while (match1(parser, PM_TOKEN_STRING_CONTENT));
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
-
- pm_node_list_free(&parts);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+ node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
} else if (accept1(parser, PM_TOKEN_STRING_END)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
} else {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ parser->previous.type = 0;
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
}
} else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string
@@ -16737,7 +16371,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
parser_lex(parser);
if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
// Kind of odd behavior, but basically if we have an
@@ -16747,43 +16381,38 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
if (!accept1(parser, PM_TOKEN_STRING_END)) {
const uint8_t *location = parser->previous.end;
if (location > parser->start && location[-1] == '\n') location--;
- pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
+ pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
} else if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
+ node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else {
// If we get here, then we have interpolation so we'll need
// to create a string or symbol node with interpolation.
pm_node_list_t parts = { 0 };
- pm_token_t string_opening = not_provided(parser);
- pm_token_t string_closing = not_provided(parser);
-
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
pm_node_flag_set(part, parse_unescaped_encoding(parser));
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
}
}
if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
}
-
- pm_node_list_free(&parts);
}
} else {
// If we get here, then the first part of the string is not plain
@@ -16794,22 +16423,20 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
}
}
if (accept1(parser, PM_TOKEN_LABEL_END)) {
- node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
} else if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
- node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
+ node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
}
-
- pm_node_list_free(&parts);
}
if (current == NULL) {
@@ -16839,14 +16466,12 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1
}
concating = true;
- pm_token_t bounds = not_provided(parser);
-
- pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
- pm_interpolated_string_node_append(container, current);
- current = (pm_node_t *) container;
+ pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, container, current);
+ current = UP(container);
}
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
}
}
@@ -16868,12 +16493,12 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
static void
parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
// Skip this capture if it starts with an underscore.
- if (*location->start == '_') return;
+ if (peek_at(parser, parser->start + location->start) == '_') return;
if (pm_constant_id_list_includes(captures, capture)) {
- pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
+ pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
} else {
- pm_constant_id_list_append(captures, capture);
+ pm_constant_id_list_append(parser->arena, captures, capture);
}
}
@@ -16887,7 +16512,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
while (accept1(parser, PM_TOKEN_COLON_COLON)) {
pm_token_t delimiter = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+ node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
// If there is a [ or ( that follows, then this is part of a larger pattern
@@ -16908,7 +16533,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+ expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
}
closing = parser->previous;
@@ -16920,7 +16545,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
}
closing = parser->previous;
@@ -16929,7 +16554,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
if (!inner) {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
- return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
+ return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
}
// Now that we have the inner pattern, check to see if it's an array, find,
@@ -16940,15 +16565,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
case PM_ARRAY_PATTERN_NODE: {
pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = node->location.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(pattern_node, node);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
pattern_node->constant = node;
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -16956,15 +16581,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
case PM_FIND_PATTERN_NODE: {
pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = node->location.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(pattern_node, node);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
pattern_node->constant = node;
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -16972,15 +16597,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
case PM_HASH_PATTERN_NODE: {
pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
- if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = node->location.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_NODE(pattern_node, node);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
pattern_node->constant = node;
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -16993,8 +16618,8 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures
// attaching its constant. In this case we'll create an array pattern and
// attach our constant to it.
pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
- pm_array_pattern_node_requireds_append(pattern_node, inner);
- return (pm_node_t *) pattern_node;
+ pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
+ return UP(pattern_node);
}
/**
@@ -17010,21 +16635,20 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
// will check for that here. If they do, then we'll add it to the local
// table since this pattern will cause it to become a local variable.
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
- pm_token_t identifier = parser->previous;
- pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
+ pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
int depth;
if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
- pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
+ pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
- name = (pm_node_t *) pm_local_variable_target_node_create(
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+ name = UP(pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&identifier),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
- );
+ ));
}
// Finally we can return the created node.
@@ -17043,7 +16667,7 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
pm_node_t *value = NULL;
if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
- return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
+ return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
}
if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
@@ -17054,16 +16678,16 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures)
pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
- value = (pm_node_t *) pm_local_variable_target_node_create(
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+ value = UP(pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
- );
+ ));
}
- return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
+ return UP(pm_assoc_splat_node_create(parser, value, &operator));
}
/**
@@ -17100,22 +16724,24 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u
static pm_node_t *
parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
+ const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
+ const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
- pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
+ pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
int depth = -1;
- if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
+ if (pm_slice_is_valid_local(parser, start, end)) {
depth = pm_parser_local_depth_constant_id(parser, constant_id);
} else {
- pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
+ pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
- if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
- PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
+ if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
+ PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
}
}
if (depth == -1) {
- pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
+ pm_parser_local_add(parser, constant_id, start, end, 0);
}
parse_pattern_capture(parser, captures, constant_id, value_loc);
@@ -17126,7 +16752,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
(uint32_t) (depth == -1 ? 0 : depth)
);
- return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
+ return UP(pm_implicit_node_create(parser, UP(target)));
}
/**
@@ -17135,7 +16761,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca
*/
static void
parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
- if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
+ if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
}
}
@@ -17154,25 +16780,31 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
case PM_NO_KEYWORDS_PARAMETER_NODE:
rest = first_node;
break;
+ case PM_INTERPOLATED_SYMBOL_NODE:
case PM_SYMBOL_NODE: {
- if (pm_symbol_node_label_p(first_node)) {
- parse_pattern_hash_key(parser, &keys, first_node);
+ if (pm_symbol_node_label_p(parser, first_node)) {
+ if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
+ } else {
+ parse_pattern_hash_key(parser, &keys, first_node);
+ }
+
pm_node_t *value;
if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
- // Otherwise, we will create an implicit local variable
- // target for the value.
- value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+ if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) {
+ value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
+ } else {
+ value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0));
+ }
} else {
// Here we have a value for the first assoc in the list, so
// we will parse it now.
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
}
- pm_token_t operator = not_provided(parser);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
-
- pm_node_list_append(&assocs, assoc);
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
+ pm_node_list_append(parser->arena, &assocs, assoc);
break;
}
}
@@ -17184,11 +16816,10 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
pm_parser_err_node(parser, first_node, diag_id);
- pm_token_t operator = not_provided(parser);
- pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
+ pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
- pm_node_list_append(&assocs, assoc);
+ pm_node_list_append(parser->arena, &assocs, assoc);
break;
}
}
@@ -17212,7 +16843,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
rest = assoc;
} else {
pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
- pm_node_list_append(&assocs, assoc);
+ pm_node_list_append(parser->arena, &assocs, assoc);
}
} else {
pm_node_t *key;
@@ -17222,36 +16853,43 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
- } else if (!pm_symbol_node_label_p(key)) {
+ } else if (!pm_symbol_node_label_p(parser, key)) {
pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
}
+ } else if (accept1(parser, PM_TOKEN_LABEL)) {
+ key = UP(pm_symbol_node_label_create(parser, &parser->previous));
} else {
expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
- key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+
+ pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
+ key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
}
parse_pattern_hash_key(parser, &keys, key);
pm_node_t *value = NULL;
if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+ if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
+ value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
+ } else {
+ value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0));
+ }
} else {
value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
}
- pm_token_t operator = not_provided(parser);
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
if (rest != NULL) {
pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
}
- pm_node_list_append(&assocs, assoc);
+ pm_node_list_append(parser->arena, &assocs, assoc);
}
}
pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
- xfree(assocs.nodes);
+ // assocs.nodes is arena-allocated; no explicit free needed.
pm_static_literals_free(&keys);
return node;
@@ -17273,13 +16911,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
- return (pm_node_t *) pm_local_variable_target_node_create(
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
+ return UP(pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
- );
+ ));
}
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
pm_token_t opening = parser->current;
@@ -17288,7 +16926,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
// If we have an empty array pattern, then we'll just return a new
// array pattern node.
- return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
+ return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
}
// Otherwise, we'll parse the inner pattern, then deal with it depending
@@ -17296,34 +16934,34 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
+ expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
pm_token_t closing = parser->previous;
switch (PM_NODE_TYPE(inner)) {
case PM_ARRAY_PATTERN_NODE: {
pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
- if (pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = opening.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
}
case PM_FIND_PATTERN_NODE: {
pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
- if (pattern_node->opening_loc.start == NULL) {
- pattern_node->base.location.start = opening.start;
- pattern_node->base.location.end = closing.end;
+ if (pattern_node->opening_loc.length == 0) {
+ PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
- pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ pattern_node->opening_loc = TOK2LOC(parser, &opening);
+ pattern_node->closing_loc = TOK2LOC(parser, &closing);
- return (pm_node_t *) pattern_node;
+ return UP(pattern_node);
}
break;
@@ -17333,8 +16971,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
}
pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
- pm_array_pattern_node_requireds_append(node, inner);
- return (pm_node_t *) node;
+ pm_array_pattern_node_requireds_append(parser->arena, node, inner);
+ return UP(node);
}
case PM_TOKEN_BRACE_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17354,19 +16992,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
switch (parser->current.type) {
case PM_TOKEN_LABEL:
parser_lex(parser);
- first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
+ first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
break;
case PM_TOKEN_USTAR_STAR:
first_node = parse_pattern_keyword_rest(parser, captures);
break;
case PM_TOKEN_STRING_BEGIN:
- first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
+ first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
break;
default: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type));
parser_lex(parser);
- first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
break;
}
}
@@ -17374,18 +17012,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
pm_token_t closing = parser->previous;
- node->base.location.start = opening.start;
- node->base.location.end = closing.end;
+ PM_NODE_START_SET_TOKEN(parser, node, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
- node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
+ node->opening_loc = TOK2LOC(parser, &opening);
+ node->closing_loc = TOK2LOC(parser, &closing);
}
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UDOT_DOT:
case PM_TOKEN_UDOT_DOT_DOT: {
@@ -17396,21 +17034,27 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
// expression as the right side of the range.
switch (parser->current.type) {
case PM_CASE_PRIMITIVE: {
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
default: {
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
- pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
}
}
case PM_CASE_PRIMITIVE: {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
// If we found a label, we need to immediately return to the caller.
- if (pm_symbol_node_label_p(node)) return node;
+ if (pm_symbol_node_label_p(parser, node)) return node;
+
+ // Call nodes (arithmetic operations) are not allowed in patterns
+ if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
+ pm_parser_err_node(parser, node, diag_id);
+ return UP(pm_error_recovery_node_create_unexpected(parser, node));
+ }
// Now that we have a primitive, we need to check if it's part of a range.
if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
@@ -17421,11 +17065,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
// node. Otherwise, we'll create an endless range.
switch (parser->current.type) {
case PM_CASE_PRIMITIVE: {
- pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
+ pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
+ return UP(pm_range_node_create(parser, node, &operator, right));
}
default:
- return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
+ return UP(pm_range_node_create(parser, node, &operator, NULL));
}
}
@@ -17440,44 +17084,44 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) parse_variable(parser);
+ pm_node_t *variable = UP(parse_variable(parser));
if (variable == NULL) {
- PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
- variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
+ variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
}
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
- pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
case PM_TOKEN_PARENTHESIS_LEFT: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -17486,19 +17130,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
pm_token_t lparen = parser->current;
parser_lex(parser);
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
- return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
+ return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
}
default: {
// If we get here, then we have a pin operator followed by something
// not understood. We'll create a missing node and return that.
pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
- pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
- return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
+ pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
+ return UP(pm_pinned_variable_node_create(parser, &operator, variable));
}
}
}
@@ -17509,31 +17153,56 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
- return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
+ return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
}
case PM_TOKEN_CONSTANT: {
pm_token_t constant = parser->current;
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
+ pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
}
default:
pm_parser_err_current(parser, diag_id);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
+ }
+}
+
+static bool
+parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_LOCAL_VARIABLE_TARGET_NODE: {
+ pm_parser_t *parser = (pm_parser_t *) data;
+ pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
+ return false;
+ }
+ default:
+ return true;
}
}
/**
+ * When we get here, we know that we already have a syntax error, because we
+ * know we have captured a variable and that we are in an alternation.
+ */
+static void
+parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
+ pm_visit_node(node, parse_pattern_alternation_error_each, parser);
+}
+
+/**
* Parse any number of primitives joined by alternation and ended optionally by
* assignment.
*/
static pm_node_t *
parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
pm_node_t *node = first_node;
+ bool alternation = false;
- while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
- pm_token_t operator = parser->previous;
+ while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
+ if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
+ parse_pattern_alternation_error(parser, node);
+ }
switch (parser->current.type) {
case PM_TOKEN_IDENTIFIER:
@@ -17545,41 +17214,47 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
case PM_TOKEN_UDOT_DOT:
case PM_TOKEN_UDOT_DOT_DOT:
case PM_CASE_PRIMITIVE: {
- if (node == NULL) {
+ if (!alternation) {
node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
} else {
+ pm_token_t operator = parser->previous;
pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
}
break;
}
case PM_TOKEN_PARENTHESIS_LEFT:
case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
+ pm_token_t operator = parser->previous;
pm_token_t opening = parser->current;
parser_lex(parser);
pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
- pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0);
+ expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
+ pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
- if (node == NULL) {
+ if (!alternation) {
node = right;
} else {
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
}
break;
}
default: {
pm_parser_err_current(parser, diag_id);
- pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
- if (node == NULL) {
+ if (!alternation) {
node = right;
} else {
- node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
+ if (captures->size) parse_pattern_alternation_error(parser, right);
+ node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
}
break;
@@ -17600,15 +17275,15 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p
pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
}
- parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
+ parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
parser,
- &PM_LOCATION_TOKEN_VALUE(&parser->previous),
+ &TOK2LOC(parser, &parser->previous),
constant_id,
(uint32_t) (depth == -1 ? 0 : depth)
);
- node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
+ node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
}
return node;
@@ -17627,8 +17302,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
switch (parser->current.type) {
case PM_TOKEN_LABEL: {
parser_lex(parser);
- pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
- node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
+ pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
+ node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
if (!(flags & PM_PARSE_PATTERN_TOP)) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17638,7 +17313,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
}
case PM_TOKEN_USTAR_STAR: {
node = parse_pattern_keyword_rest(parser, captures);
- node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+ node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
if (!(flags & PM_PARSE_PATTERN_TOP)) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17651,8 +17326,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// be dynamic symbols leading to hash patterns.
node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
- if (pm_symbol_node_label_p(node)) {
- node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+ if (pm_symbol_node_label_p(parser, node)) {
+ node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
if (!(flags & PM_PARSE_PATTERN_TOP)) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
@@ -17667,7 +17342,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
case PM_TOKEN_USTAR: {
if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
parser_lex(parser);
- node = (pm_node_t *) parse_pattern_rest(parser, captures);
+ node = UP(parse_pattern_rest(parser, captures));
leading_rest = true;
break;
}
@@ -17680,8 +17355,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// If we got a dynamic label symbol, then we need to treat it like the
// beginning of a hash pattern.
- if (pm_symbol_node_label_p(node)) {
- return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
+ if (pm_symbol_node_label_p(parser, node)) {
+ return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
}
if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
@@ -17689,20 +17364,20 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// or a find pattern. We need to parse all of the patterns, put them
// into a big list, and then determine which type of node we have.
pm_node_list_t nodes = { 0 };
- pm_node_list_append(&nodes, node);
+ pm_node_list_append(parser->arena, &nodes, node);
// Gather up all of the patterns into the list.
while (accept1(parser, PM_TOKEN_COMMA)) {
// Break early here in case we have a trailing comma.
- if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
- node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
- pm_node_list_append(&nodes, node);
+ if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
+ node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
+ pm_node_list_append(parser->arena, &nodes, node);
trailing_rest = true;
break;
}
if (accept1(parser, PM_TOKEN_USTAR)) {
- node = (pm_node_t *) parse_pattern_rest(parser, captures);
+ node = UP(parse_pattern_rest(parser, captures));
// If we have already parsed a splat pattern, then this is an
// error. We will continue to parse the rest of the patterns,
@@ -17716,7 +17391,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
}
- pm_node_list_append(&nodes, node);
+ pm_node_list_append(parser->arena, &nodes, node);
}
// If the first pattern and the last pattern are rest patterns, then we
@@ -17724,24 +17399,24 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
// are in between because we know we already added the appropriate
// errors. Otherwise we will create an array pattern.
if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
- node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
+ node = UP(pm_find_pattern_node_create(parser, &nodes));
if (nodes.size == 2) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
}
} else {
- node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
+ node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
if (leading_rest && trailing_rest) {
pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
}
}
- xfree(nodes.nodes);
+ // nodes.nodes is arena-allocated; no explicit free needed.
} else if (leading_rest) {
// Otherwise, if we parsed a single splat pattern, then we know we have
// an array pattern, so we can go ahead and create that node.
- node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
+ node = UP(pm_array_pattern_node_rest_create(parser, node));
}
return node;
@@ -17752,29 +17427,33 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
* from its start bounds. If it's a compound node, then we will recursively
* apply this function to its value.
*/
-static inline void
+static PRISM_INLINE void
parse_negative_numeric(pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE: {
pm_integer_node_t *cast = (pm_integer_node_t *) node;
cast->base.location.start--;
+ cast->base.location.length++;
cast->value.negative = true;
break;
}
case PM_FLOAT_NODE: {
pm_float_node_t *cast = (pm_float_node_t *) node;
cast->base.location.start--;
+ cast->base.location.length++;
cast->value = -cast->value;
break;
}
case PM_RATIONAL_NODE: {
pm_rational_node_t *cast = (pm_rational_node_t *) node;
cast->base.location.start--;
+ cast->base.location.length++;
cast->numerator.negative = true;
break;
}
case PM_IMAGINARY_NODE:
node->location.start--;
+ node->location.length++;
parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
break;
default:
@@ -17792,22 +17471,22 @@ static void
pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
switch (diag_id) {
case PM_ERR_HASH_KEY: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type));
break;
}
case PM_ERR_HASH_VALUE:
case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
break;
}
case PM_ERR_UNARY_RECEIVER: {
- const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
+ const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
break;
}
case PM_ERR_UNARY_DISALLOWED:
case PM_ERR_EXPECT_ARGUMENT: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
break;
}
default:
@@ -17887,6 +17566,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) {
case PM_CONTEXT_BEGIN:
case PM_CONTEXT_BLOCK_BRACES:
case PM_CONTEXT_BLOCK_KEYWORDS:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_CASE_IN:
case PM_CONTEXT_CASE_WHEN:
case PM_CONTEXT_DEFAULT_PARAMS:
@@ -17967,6 +17647,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
case PM_CONTEXT_BLOCK_KEYWORDS:
case PM_CONTEXT_BLOCK_ELSE:
case PM_CONTEXT_BLOCK_ENSURE:
+ case PM_CONTEXT_BLOCK_PARAMETERS:
case PM_CONTEXT_BLOCK_RESCUE:
case PM_CONTEXT_CASE_IN:
case PM_CONTEXT_CASE_WHEN:
@@ -18003,67 +17684,1383 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) {
}
/**
- * This struct is used to pass information between the regular expression parser
- * and the error callback.
+ * Determine if a given call node looks like a "command", which means it has
+ * arguments but does not have parentheses.
*/
-typedef struct {
- /** The parser that we are parsing the regular expression for. */
- pm_parser_t *parser;
+static PRISM_INLINE bool
+pm_call_node_command_p(const pm_call_node_t *node) {
+ return (
+ (node->opening_loc.length == 0) &&
+ (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
+ (node->arguments != NULL || node->block != NULL)
+ );
+}
- /** The start of the regular expression. */
- const uint8_t *start;
+/**
+ * Returns true if the given node is a command-style call (a method call without
+ * parentheses that has arguments), excluding operator calls (e.g., a + b) which
+ * satisfy the same structural criteria but are not commands.
+ */
+static bool
+pm_command_call_value_p(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_CALL_NODE: {
+ const pm_call_node_t *call = (const pm_call_node_t *) node;
- /** The end of the regular expression. */
- const uint8_t *end;
+ // Command-style calls (e.g., foo bar, obj.foo bar).
+ // Attribute writes (e.g., a.b = 1) are not commands.
+ if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
+ return true;
+ }
- /**
- * Whether or not the source of the regular expression is shared. This
- * impacts the location of error messages, because if it is shared then we
- * can use the location directly and if it is not, then we use the bounds of
- * the regular expression itself.
- */
- bool shared;
-} parse_regular_expression_error_data_t;
+ // A `!` or `not` prefix wrapping a command call (e.g.,
+ // `!foo bar`, `not foo bar`) is also a command-call value.
+ if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
+ return pm_command_call_value_p(call->receiver);
+ }
+
+ return false;
+ }
+ case PM_SUPER_NODE: {
+ const pm_super_node_t *cast = (const pm_super_node_t *) node;
+ return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
+ }
+ case PM_YIELD_NODE: {
+ const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
+ return cast->lparen_loc.length == 0 && cast->arguments != NULL;
+ }
+ case PM_RESCUE_MODIFIER_NODE:
+ return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
+ case PM_DEF_NODE: {
+ const pm_def_node_t *cast = (const pm_def_node_t *) node;
+ if (cast->equal_loc.length > 0 && cast->body != NULL) {
+ const pm_node_t *body = cast->body;
+ if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
+ body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
+ }
+ return pm_command_call_value_p(body);
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
/**
- * This callback is called when the regular expression parser encounters a
- * syntax error.
+ * Returns true if the given node is a block call: a command
+ * with a do-block, or any call chained (via `.`, `::`, `&.`) from such a node.
+ * Block calls can only be followed by call chaining, composition (and/or), and
+ * modifier operators.
*/
-static void
-parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
- parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data;
- pm_location_t location;
+static bool
+pm_block_call_p(const pm_node_t *node) {
+ while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
+ const pm_call_node_t *call = (const pm_call_node_t *) node;
+ if (call->opening_loc.length > 0) return false;
+
+ // Root: command with do-block (e.g., `foo bar do end`).
+ if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
+ return true;
+ }
- if (callback_data->shared) {
- location = (pm_location_t) { .start = start, .end = end };
+ // Walk up the receiver chain (e.g., `foo bar do end.baz`).
+ if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
+ node = call->receiver;
+ continue;
+ }
+
+ return false;
+ }
+
+ return false;
+}
+
+/**
+ * Parse a case expression (the `case` keyword). This handles both case-when and
+ * case-in (pattern matching) forms.
+ */
+static pm_node_t *
+parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
+
+ pm_token_t case_keyword = parser->previous;
+ pm_node_t *predicate = NULL;
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ predicate = NULL;
+ } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
+ predicate = NULL;
+ } else if (!token_begins_expression_p(parser->current.type)) {
+ predicate = NULL;
+ } else {
+ predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
+ while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ }
+
+ if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+ parser_lex(parser);
+ pop_block_exits(parser, previous_block_exits);
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+ return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
+ }
+
+ /* At this point we can create a case node, though we don't yet know if it
+ * is a case-in or case-when node. */
+ pm_node_t *node;
+
+ if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+ pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
+ pm_static_literals_t literals = { 0 };
+
+ /* At this point we've seen a when keyword, so we know this is a
+ * case-when node. We will continue to parse the when nodes until we hit
+ * the end of the list. */
+ while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+ parser_lex(parser);
+
+ pm_token_t when_keyword = parser->previous;
+ pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
+
+ do {
+ if (accept1(parser, PM_TOKEN_USTAR)) {
+ pm_token_t operator = parser->previous;
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+
+ pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
+ pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
+
+ if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
+ } else {
+ pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
+ pm_when_node_conditions_append(parser->arena, when_node, condition);
+
+ /* If we found a missing node, then this is a syntax error
+ * and we should stop looping. */
+ if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
+
+ /* If this is a string node, then we need to mark it as
+ * frozen because when clause strings are frozen. */
+ if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
+ pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+ } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+ pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ pm_when_clause_static_literals_add(parser, &literals, condition);
+ }
+ } while (accept1(parser, PM_TOKEN_COMMA));
+
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+ }
+ } else {
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+ pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+ }
+
+ if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
+ if (statements != NULL) {
+ pm_when_node_statements_set(when_node, statements);
+ }
+ }
+
+ pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
+ }
+
+ /* If we didn't parse any conditions (in or when) then we need to
+ * indicate that we have an error. */
+ if (case_node->conditions.size == 0) {
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+ }
+
+ pm_static_literals_free(&literals);
+ node = UP(case_node);
+ } else {
+ pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
+
+ /* If this is a case-match node (i.e., it is a pattern matching case
+ * statement) then we must have a predicate. */
+ if (predicate == NULL) {
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
+ }
+
+ /* At this point we expect that we're parsing a case-in node. We will
+ * continue to parse the in nodes until we hit the end of the list. */
+ while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+
+ bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+ parser->pattern_matching_newlines = true;
+
+ lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+ parser->command_start = false;
+ parser_lex(parser);
+
+ pm_token_t in_keyword = parser->previous;
+
+ pm_constant_id_list_t captures = { 0 };
+ pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
+ parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+
+ /* Since we're in the top-level of the case-in node we need to
+ * check for guard clauses in the form of `if` or `unless`
+ * statements. */
+ if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
+ pm_token_t keyword = parser->previous;
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+ pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
+ } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
+ pm_token_t keyword = parser->previous;
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+ pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
+ }
+
+ /* Now we need to check for the terminator of the in node's pattern.
+ * It can be a newline or semicolon optionally followed by a `then`
+ * keyword. */
+ pm_token_t then_keyword = { 0 };
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+ then_keyword = parser->previous;
+ }
+ } else {
+ expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
+ then_keyword = parser->previous;
+ }
+
+ /* Now we can actually parse the statements associated with the in
+ * node. */
+ pm_statements_node_t *statements;
+ if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ statements = NULL;
+ } else {
+ statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
+ }
+
+ /* Now that we have the full pattern and statements, we can create
+ * the node and attach it to the case node. */
+ pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
+ pm_case_match_node_condition_append(parser->arena, case_node, condition);
+ }
+
+ /* If we didn't parse any conditions (in or when) then we need to
+ * indicate that we have an error. */
+ if (case_node->conditions.size == 0) {
+ pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+ }
+
+ node = UP(case_node);
+ }
+
+ accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+ pm_token_t else_keyword = parser->previous;
+ pm_else_node_t *else_node;
+
+ if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+ else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
+ } else {
+ else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
+ }
+
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+ pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
+ } else {
+ pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
+ }
+ }
+
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
+
+ if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+ pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
} else {
- location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
+ pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
}
- PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
+ pop_block_exits(parser, previous_block_exits);
+ return node;
}
/**
- * Parse the errors for the regular expression and add them to the parser.
+ * Parse a class definition expression (the `class` keyword). This handles both
+ * regular class definitions and singleton class definitions (`class << expr`).
*/
-static void
-parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
- const pm_string_t *unescaped = &node->unescaped;
- parse_regular_expression_error_data_t error_data = {
- .parser = parser,
- .start = node->base.location.start,
- .end = node->base.location.end,
- .shared = unescaped->type == PM_STRING_SHARED
- };
+static pm_node_t *
+parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
+
+ pm_token_t class_keyword = parser->previous;
+ pm_do_loop_stack_push(parser, false);
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ if (accept1(parser, PM_TOKEN_LESS_LESS)) {
+ pm_token_t operator = parser->previous;
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
+
+ pm_parser_scope_push(parser, true);
+ if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
+ }
+
+ pm_node_t *statements = NULL;
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+ }
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+ pm_parser_scope_pop(parser);
+ pm_do_loop_stack_pop(parser);
+
+ flush_block_exits(parser, previous_block_exits);
+ return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
+ }
+
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
+ pm_token_t name = parser->previous;
+ if (name.type != PM_TOKEN_CONSTANT) {
+ pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
+ }
+
+ pm_token_t inheritance_operator = { 0 };
+ pm_node_t *superclass;
+
+ if (match1(parser, PM_TOKEN_LESS)) {
+ inheritance_operator = parser->current;
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+
+ parser->command_start = true;
+ parser_lex(parser);
+
+ superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
+ } else {
+ superclass = NULL;
+ }
+
+ pm_parser_scope_push(parser, true);
+
+ if (inheritance_operator.start != NULL) {
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
+ } else {
+ accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ }
+ pm_node_t *statements = NULL;
+
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+ }
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+ if (context_def_p(parser)) {
+ pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
+ }
- pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+ pm_parser_scope_pop(parser);
+ pm_do_loop_stack_pop(parser);
+
+ if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
+ pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
+ if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+ constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+ }
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+ return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
+}
+
+/**
+ * Parse a method definition expression (the `def` keyword).
+ */
+static pm_node_t *
+parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ pm_token_t def_keyword = parser->current;
+ size_t opening_newline_index = token_newline_index(parser);
+
+ pm_node_t *receiver = NULL;
+ pm_token_t operator = { 0 };
+ pm_token_t name;
+
+ /* This context is necessary for lexing `...` in a bare params correctly. It
+ * must be pushed before lexing the first param, so it is here. */
+ context_push(parser, PM_CONTEXT_DEF_PARAMS);
+ parser_lex(parser);
+
+ /* This will be false if the method name is not a valid identifier but could
+ * be followed by an operator. */
+ bool valid_name = true;
+
+ switch (parser->current.type) {
+ case PM_CASE_OPERATOR:
+ pm_parser_scope_push(parser, true);
+ lex_state_set(parser, PM_LEX_STATE_ENDFN);
+ parser_lex(parser);
+
+ name = parser->previous;
+ break;
+ case PM_TOKEN_IDENTIFIER: {
+ parser_lex(parser);
+
+ if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+ receiver = parse_variable_call(parser);
+
+ pm_parser_scope_push(parser, true);
+ lex_state_set(parser, PM_LEX_STATE_FNAME);
+ parser_lex(parser);
+
+ operator = parser->previous;
+ name = parse_method_definition_name(parser);
+ } else {
+ pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
+ pm_parser_scope_push(parser, true);
+
+ name = parser->previous;
+ }
+
+ break;
+ }
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ case PM_TOKEN_CLASS_VARIABLE:
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ valid_name = false;
+ PRISM_FALLTHROUGH
+ case PM_TOKEN_CONSTANT:
+ case PM_TOKEN_KEYWORD_NIL:
+ case PM_TOKEN_KEYWORD_SELF:
+ case PM_TOKEN_KEYWORD_TRUE:
+ case PM_TOKEN_KEYWORD_FALSE:
+ case PM_TOKEN_KEYWORD___FILE__:
+ case PM_TOKEN_KEYWORD___LINE__:
+ case PM_TOKEN_KEYWORD___ENCODING__: {
+ pm_parser_scope_push(parser, true);
+ parser_lex(parser);
+
+ pm_token_t identifier = parser->previous;
+
+ if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+ lex_state_set(parser, PM_LEX_STATE_FNAME);
+ parser_lex(parser);
+ operator = parser->previous;
+
+ switch (identifier.type) {
+ case PM_TOKEN_CONSTANT:
+ receiver = UP(pm_constant_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_CLASS_VARIABLE:
+ receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_NIL:
+ receiver = UP(pm_nil_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_SELF:
+ receiver = UP(pm_self_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_TRUE:
+ receiver = UP(pm_true_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD_FALSE:
+ receiver = UP(pm_false_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD___FILE__:
+ receiver = UP(pm_source_file_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD___LINE__:
+ receiver = UP(pm_source_line_node_create(parser, &identifier));
+ break;
+ case PM_TOKEN_KEYWORD___ENCODING__:
+ receiver = UP(pm_source_encoding_node_create(parser, &identifier));
+ break;
+ default:
+ break;
+ }
+
+ name = parse_method_definition_name(parser);
+ } else {
+ if (!valid_name) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
+ }
+
+ name = identifier;
+ }
+ break;
+ }
+ case PM_TOKEN_PARENTHESIS_LEFT: {
+ /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
+ * expression of this parenthesis should not be processed under this
+ * context. Thus, the context is popped here. */
+ context_pop(parser);
+ parser_lex(parser);
+
+ pm_token_t lparen = parser->previous;
+ pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
+
+ accept1(parser, PM_TOKEN_NEWLINE);
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+ pm_token_t rparen = parser->previous;
+
+ lex_state_set(parser, PM_LEX_STATE_FNAME);
+ expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
+
+ operator = parser->previous;
+ receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
+
+ /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
+ * described the above. */
+ pm_parser_scope_push(parser, true);
+ context_push(parser, PM_CONTEXT_DEF_PARAMS);
+ name = parse_method_definition_name(parser);
+ break;
+ }
+ default:
+ pm_parser_scope_push(parser, true);
+ name = parse_method_definition_name(parser);
+ break;
+ }
+
+ pm_token_t lparen = { 0 };
+ pm_token_t rparen = { 0 };
+ pm_parameters_node_t *params;
+
+ bool accept_endless_def = true;
+ switch (parser->current.type) {
+ case PM_TOKEN_PARENTHESIS_LEFT: {
+ parser_lex(parser);
+ lparen = parser->previous;
+
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ params = NULL;
+ } else {
+ /* https://bugs.ruby-lang.org/issues/19107 */
+ bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
+ params = parse_parameters(
+ parser,
+ PM_BINDING_POWER_DEFINED,
+ true,
+ allow_trailing_comma,
+ true,
+ true,
+ false,
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+ (uint16_t) (depth + 1)
+ );
+ }
+
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->command_start = true;
+
+ context_pop(parser);
+ if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = 0;
+ }
+
+ rparen = parser->previous;
+ break;
+ }
+ case PM_CASE_PARAMETER: {
+ /* If we're about to lex a label, we need to add the label state to
+ * make sure the next newline is ignored. */
+ if (parser->current.type == PM_TOKEN_LABEL) {
+ lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
+ }
+
+ params = parse_parameters(
+ parser,
+ PM_BINDING_POWER_DEFINED,
+ false,
+ false,
+ true,
+ true,
+ false,
+ PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+ (uint16_t) (depth + 1)
+ );
+
+ /* Reject `def * = 1` and similar. We have to specifically check for
+ * them because they create ambiguity with optional arguments. */
+ accept_endless_def = false;
+
+ context_pop(parser);
+ break;
+ }
+ default: {
+ params = NULL;
+ context_pop(parser);
+ break;
+ }
+ }
+
+ pm_node_t *statements = NULL;
+ pm_token_t equal = { 0 };
+ pm_token_t end_keyword = { 0 };
+
+ if (accept1(parser, PM_TOKEN_EQUAL)) {
+ if (token_is_setter_name(&name)) {
+ pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
+ }
+ if (!accept_endless_def) {
+ pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
+ }
+ if (
+ parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
+ parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
+ ) {
+ PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
+ }
+ equal = parser->previous;
+
+ context_push(parser, PM_CONTEXT_DEF);
+ pm_do_loop_stack_push(parser, false);
+ statements = UP(pm_statements_node_create(parser));
+
+ uint8_t allow_flags;
+ if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+ allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
+ } else {
+ /* Allow `def foo = puts "Hello"` but not
+ * `private def foo = puts "Hello"` */
+ allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
+ }
+
+ /* Inside a def body, we push true onto the accepts_block_stack so that
+ * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
+ * for primary-level constructs, not commands). During command argument
+ * parsing, the stack is pushed to false, causing `do` to be lexed as
+ * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
+ * def body and instead left for the outer context. */
+ pm_accepts_block_stack_push(parser, true);
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
+ pm_accepts_block_stack_pop(parser);
+
+ /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
+ * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
+ * intentionally not caught here — it should bubble up to the outer
+ * context (e.g., `private def f = puts "Hello" do end` where the block
+ * attaches to `private`). */
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
+ pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
+ pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
+ }
+
+ if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+ context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+ pm_token_t rescue_keyword = parser->previous;
+
+ /* In the Ruby grammar, the rescue value of an endless method
+ * command excludes and/or and in/=>. */
+ pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
+ }
+
+ /* A nested endless def whose body is a command call (e.g.,
+ * `def f = def g = foo bar`) is a command assignment and cannot appear
+ * as a def body. */
+ if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
+ PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ }
+
+ pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
+ pm_do_loop_stack_pop(parser);
+ context_pop(parser);
+ } else {
+ if (lparen.start == NULL) {
+ lex_state_set(parser, PM_LEX_STATE_BEG);
+ parser->command_start = true;
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
+ } else {
+ accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+ }
+
+ pm_accepts_block_stack_push(parser, true);
+ pm_do_loop_stack_push(parser, false);
+
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
+ }
+
+ pm_accepts_block_stack_pop(parser);
+ pm_do_loop_stack_pop(parser);
+
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
+ end_keyword = parser->previous;
+ }
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+ pm_parser_scope_pop(parser);
+
+ /* If the final character is `@` as is the case when defining methods to
+ * override the unary operators, we should ignore the @ in the same way we
+ * do for symbols. */
+ pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
+
+ flush_block_exits(parser, previous_block_exits);
+
+ return UP(pm_def_node_create(
+ parser,
+ name_id,
+ &name,
+ receiver,
+ params,
+ statements,
+ &locals,
+ &def_keyword,
+ NTOK2PTR(operator),
+ NTOK2PTR(lparen),
+ NTOK2PTR(rparen),
+ NTOK2PTR(equal),
+ NTOK2PTR(end_keyword)
+ ));
+}
+
+/**
+ * Parse a module definition expression (the `module` keyword).
+ */
+static pm_node_t *
+parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ size_t opening_newline_index = token_newline_index(parser);
+ parser_lex(parser);
+ pm_token_t module_keyword = parser->previous;
+
+ pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
+ pm_token_t name;
+
+ /* If we can recover from a syntax error that occurred while parsing the
+ * name of the module, then we'll handle that here. */
+ if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+ pop_block_exits(parser, previous_block_exits);
+
+ pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
+ }
+
+ while (accept1(parser, PM_TOKEN_COLON_COLON)) {
+ pm_token_t double_colon = parser->previous;
+
+ expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+ constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
+ }
+
+ /* Here we retrieve the name of the module. If it wasn't a constant, then
+ * it's possible that `module foo` was passed, which is a syntax error. We
+ * handle that here as well. */
+ name = parser->previous;
+ if (name.type != PM_TOKEN_CONSTANT) {
+ pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
+ }
+
+ if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+ constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+ }
+
+ pm_parser_scope_push(parser, true);
+ accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
+ pm_node_t *statements = NULL;
+
+ if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+ pm_accepts_block_stack_push(parser, true);
+ statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
+ pm_accepts_block_stack_pop(parser);
+ }
+
+ if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+ assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+ statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
+ } else {
+ parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
+ }
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+ pm_parser_scope_pop(parser);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
+
+ if (context_def_p(parser)) {
+ pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+
+ return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
+}
+
+/**
+ * Parse an interpolated word array literal (`%W[...]`).
+ */
+static pm_node_t *
+parse_string_array(pm_parser_t *parser, uint16_t depth) {
+ parser_lex(parser);
+ pm_token_t opening = parser->previous;
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+ /* This is the current node that we are parsing that will be added to the
+ * list of elements. */
+ pm_node_t *current = NULL;
+
+ while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ switch (parser->current.type) {
+ case PM_TOKEN_WORDS_SEP: {
+ /* Reset the explicit encoding if we hit a separator since each
+ * element can have its own encoding. */
+ parser->explicit_encoding = NULL;
+
+ if (current == NULL) {
+ /* If we hit a separator before we have any content, then we
+ * don't need to do anything. */
+ } else {
+ /* If we hit a separator after we've hit content, then we
+ * need to append that content to the list and reset the
+ * current node. */
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ }
+
+ parser_lex(parser);
+ break;
+ }
+ case PM_TOKEN_STRING_CONTENT: {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ pm_node_flag_set(string, parse_unescaped_encoding(parser));
+ parser_lex(parser);
+
+ if (current == NULL) {
+ /* If we hit content and the current node is NULL, then this
+ * is the first string content we've seen. In that case
+ * we're going to create a new string node and set that to
+ * the current. */
+ current = string;
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ /* If we hit string content and the current node is an
+ * interpolated string, then we need to append the string
+ * content to the list of child nodes. */
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ /* If we hit string content and the current node is a string
+ * node, then we need to convert the current node into an
+ * interpolated string and add the string content to the
+ * list of child nodes. */
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ pm_interpolated_string_node_append(parser, interpolated, string);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+
+ break;
+ }
+ case PM_TOKEN_EMBVAR: {
+ if (current == NULL) {
+ /* If we hit an embedded variable and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ /* If we hit an embedded variable and the current node is a
+ * string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ current = UP(interpolated);
+ } else {
+ /* If we hit an embedded variable and the current node is an
+ * interpolated string, then we'll just add the embedded
+ * variable. */
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+ break;
+ }
+ case PM_TOKEN_EMBEXPR_BEGIN: {
+ if (current == NULL) {
+ /* If we hit an embedded expression and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * a string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ current = UP(interpolated);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * an interpolated string, then we'll just continue on. */
+ } else {
+ assert(false && "unreachable");
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+ break;
+ }
+ default:
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
+ parser_lex(parser);
+ break;
+ }
+ }
+
+ /* If we have a current node, then we need to append it to the list. */
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ }
+
+ pm_token_t closing = parser->current;
+ if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
+ }
+
+ pm_array_node_close_set(parser, array, &closing);
+ return UP(array);
+}
+
+/**
+ * Parse an interpolated symbol array literal (`%I[...]`).
+ */
+static pm_node_t *
+parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
+ parser_lex(parser);
+ pm_token_t opening = parser->previous;
+ pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+ /* This is the current node that we are parsing that will be added to the
+ * list of elements. */
+ pm_node_t *current = NULL;
+
+ while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+ switch (parser->current.type) {
+ case PM_TOKEN_WORDS_SEP: {
+ if (current == NULL) {
+ /* If we hit a separator before we have any content, then we
+ * don't need to do anything. */
+ } else {
+ /* If we hit a separator after we've hit content, then we
+ * need to append that content to the list and reset the
+ * current node. */
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ }
+
+ parser_lex(parser);
+ break;
+ }
+ case PM_TOKEN_STRING_CONTENT: {
+ if (current == NULL) {
+ /* If we hit content and the current node is NULL, then this
+ * is the first string content we've seen. In that case
+ * we're going to create a new string node and set that to
+ * the current. */
+ current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ /* If we hit string content and the current node is an
+ * interpolated string, then we need to append the string
+ * content to the list of child nodes. */
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ /* If we hit string content and the current node is a symbol
+ * node, then we need to convert the current node into an
+ * interpolated string and add the string content to the
+ * list of child nodes. */
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t content = {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->start + cast->value_loc.start,
+ .end = parser->start + cast->value_loc.start + cast->value_loc.length
+ };
+
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
+ parser_lex(parser);
+
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
+
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+
+ break;
+ }
+ case PM_TOKEN_EMBVAR: {
+ bool start_location_set = false;
+ if (current == NULL) {
+ /* If we hit an embedded variable and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ /* If we hit an embedded variable and the current node is a
+ * string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+ current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+ PM_NODE_START_SET_NODE(interpolated, current);
+ start_location_set = true;
+ current = UP(interpolated);
+ } else {
+ /* If we hit an embedded variable and the current node is an
+ * interpolated string, then we'll just add the embedded
+ * variable. */
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+ if (!start_location_set) {
+ PM_NODE_START_SET_NODE(current, part);
+ }
+ break;
+ }
+ case PM_TOKEN_EMBEXPR_BEGIN: {
+ bool start_location_set = false;
+ if (current == NULL) {
+ /* If we hit an embedded expression and the current node is
+ * NULL, then this is the start of a new string. We'll set
+ * the current node to a new interpolated string. */
+ current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * a string node, then we'll convert the current into an
+ * interpolated string and add the string node to the list
+ * of parts. */
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+ current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+ PM_NODE_START_SET_NODE(interpolated, current);
+ start_location_set = true;
+ current = UP(interpolated);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ /* If we hit an embedded expression and the current node is
+ * an interpolated string, then we'll just continue on. */
+ } else {
+ assert(false && "unreachable");
+ }
+
+ pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+ if (!start_location_set) {
+ PM_NODE_START_SET_NODE(current, part);
+ }
+ break;
+ }
+ default:
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
+ parser_lex(parser);
+ break;
+ }
+ }
+
+ /* If we have a current node, then we need to append it to the list. */
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ }
+
+ pm_token_t closing = parser->current;
+ if (match1(parser, PM_TOKEN_EOF)) {
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ } else {
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+ }
+ pm_array_node_close_set(parser, array, &closing);
+
+ return UP(array);
+}
+
+/**
+ * Parse a parenthesized expression, which could be a grouping, a multi-target
+ * assignment, or a set of statements.
+ */
+static pm_node_t *
+parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
+ pm_token_t opening = parser->current;
+ pm_node_flags_t paren_flags = 0;
+
+ pm_node_list_t current_block_exits = { 0 };
+ pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+ parser_lex(parser);
+ while (true) {
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ break;
+ }
+ }
+
+ /* If this is the end of the file or we match a right parenthesis, then we
+ * have an empty parentheses node, and we can immediately return. */
+ if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+ pop_block_exits(parser, previous_block_exits);
+ return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
+ }
+
+ /* Otherwise, we're going to parse the first statement in the list of
+ * statements within the parentheses. */
+ pm_accepts_block_stack_push(parser, true);
+ context_push(parser, PM_CONTEXT_PARENS);
+ pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ context_pop(parser);
+
+ /* Determine if this statement is followed by a terminator. In the case of a
+ * single statement, this is fine. But in the case of multiple statements
+ * it's required. */
+ bool terminator_found = false;
+
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ terminator_found = true;
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
+ terminator_found = true;
+ }
+
+ if (terminator_found) {
+ while (true) {
+ if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+ break;
+ }
+ }
+ }
+
+ /* If we hit a right parenthesis, then we're done parsing the parentheses
+ * node, and we can check which kind of node we should return. */
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
+ lex_state_set(parser, PM_LEX_STATE_ENDARG);
+ }
+
+ parser_lex(parser);
+ pm_accepts_block_stack_pop(parser);
+ pop_block_exits(parser, previous_block_exits);
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+ /* If we have a single statement and are ending on a right
+ * parenthesis, then we need to check if this is possibly a multiple
+ * target node. */
+ pm_multi_target_node_t *multi_target;
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
+ multi_target = (pm_multi_target_node_t *) statement;
+ } else {
+ multi_target = pm_multi_target_node_create(parser);
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
+ }
+
+ multi_target->lparen_loc = TOK2LOC(parser, &opening);
+ multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
+ PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
+ PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
+
+ pm_node_t *result;
+ if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
+ result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ accept1(parser, PM_TOKEN_NEWLINE);
+ } else {
+ result = UP(multi_target);
+ }
+
+ if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+ /* All set, this is explicitly allowed by the parent context. */
+ } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
+ /* All set, we're inside a for loop and we're parsing multiple
+ * targets. */
+ } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
+ /* Multi targets are not allowed when it's not a statement
+ * level. */
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ /* Multi targets must be followed by an equal sign in order to
+ * be valid (or a right parenthesis if they are nested). */
+ pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+
+ return result;
+ }
+
+ /* If we have a single statement and are ending on a right parenthesis
+ * and we didn't return a multiple assignment node, then we can return a
+ * regular parentheses node now. */
+ pm_statements_node_t *statements = pm_statements_node_create(parser);
+ pm_statements_node_body_append(parser, statements, statement, true);
+
+ return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
+ }
+
+ /* If we have more than one statement in the set of parentheses, then we are
+ * going to parse all of them as a list of statements. We'll do that here.
+ */
+ context_push(parser, PM_CONTEXT_PARENS);
+ paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+
+ pm_statements_node_t *statements = pm_statements_node_create(parser);
+ pm_statements_node_body_append(parser, statements, statement, true);
+
+ /* If we didn't find a terminator and we didn't find a right parenthesis,
+ * then this is a syntax error. */
+ if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ }
+
+ /* Parse each statement within the parentheses. */
+ while (true) {
+ pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+ pm_statements_node_body_append(parser, statements, node, true);
+
+ /* If we're recovering from a syntax error, then we need to stop parsing
+ * the statements now. */
+ if (parser->recovering) {
+ /* If this is the level of context where the recovery has happened,
+ * then we can mark the parser as done recovering. */
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
+ break;
+ }
+
+ /* If we couldn't parse an expression at all, then we need to bail out
+ * of the loop. */
+ if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
+
+ /* If we successfully parsed a statement, then we are going to need a
+ * terminator to delimit them. */
+ if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+ while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+ if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
+ } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ break;
+ } else if (!match1(parser, PM_TOKEN_EOF)) {
+ /* If we're at the end of the file, then we're going to add an error
+ * after this for the ) anyway. */
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ }
+ }
+
+ context_pop(parser);
+ pm_accepts_block_stack_pop(parser);
+ expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+ /* When we're parsing multi targets, we allow them to be followed by a right
+ * parenthesis if they are at the statement level. This is only possible if
+ * they are the final statement in a parentheses. We need to explicitly
+ * reject that here. */
+ {
+ pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
+
+ if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+ pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+ pm_multi_target_node_targets_append(parser, multi_target, statement);
+
+ statement = UP(multi_target);
+ statements->body.nodes[statements->body.size - 1] = statement;
+ }
+
+ if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
+ const uint8_t *offset = parser->start + PM_NODE_END(statement);
+ pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
+ pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
+
+ statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
+ statements->body.nodes[statements->body.size - 1] = statement;
+
+ pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
+ }
+ }
+
+ pop_block_exits(parser, previous_block_exits);
+ pm_void_statements_check(parser, statements, true);
+ return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
}
/**
* Parse an expression that begins with the previous node that we just lexed.
*/
-static inline pm_node_t *
-parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+static PRISM_INLINE pm_node_t *
+parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
switch (parser->current.type) {
case PM_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser);
@@ -18092,11 +19089,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
} else {
// If there was no comma, then we need to add a syntax
// error.
- const uint8_t *location = parser->previous.end;
- PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
-
- parser->previous.start = location;
- parser->previous.type = PM_TOKEN_MISSING;
+ PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type));
+ parser->previous.start = parser->previous.end;
+ parser->previous.type = 0;
}
}
@@ -18114,28 +19109,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
pm_parser_scope_forwarding_positionals_check(parser, &operator);
} else {
- expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
+ element = UP(pm_splat_node_create(parser, &operator, expression));
} else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
if (parsed_bare_hash) {
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
}
- element = (pm_node_t *) pm_keyword_hash_node_create(parser);
+ element = UP(pm_keyword_hash_node_create(parser));
pm_static_literals_t hash_keys = { 0 };
- if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
+ if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
}
pm_static_literals_free(&hash_keys);
parsed_bare_hash = true;
} else {
- element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
+ element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
- if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
+ if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
if (parsed_bare_hash) {
pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
}
@@ -18144,18 +19139,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_static_literals_t hash_keys = { 0 };
pm_hash_key_static_literals_add(parser, &hash_keys, element);
- pm_token_t operator;
+ pm_token_t operator = { 0 };
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
operator = parser->previous;
- } else {
- operator = not_provided(parser);
}
- pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
- pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
- pm_keyword_hash_node_elements_append(hash, assoc);
+ pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
+ pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
- element = (pm_node_t *) hash;
+ element = UP(hash);
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
}
@@ -18165,236 +19158,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
}
- pm_array_node_elements_append(array, element);
- if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+ pm_array_node_elements_append(parser->arena, array, element);
+ if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
}
accept1(parser, PM_TOKEN_NEWLINE);
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
+ parser->previous.type = 0;
}
- pm_array_node_close_set(array, &parser->previous);
+ pm_array_node_close_set(parser, array, &parser->previous);
pm_accepts_block_stack_pop(parser);
- return (pm_node_t *) array;
+ return UP(array);
}
case PM_TOKEN_PARENTHESIS_LEFT:
- case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
- pm_token_t opening = parser->current;
- pm_node_flags_t flags = 0;
-
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- parser_lex(parser);
- while (true) {
- if (accept1(parser, PM_TOKEN_SEMICOLON)) {
- flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
- break;
- }
- }
-
- // If this is the end of the file or we match a right parenthesis, then
- // we have an empty parentheses node, and we can immediately return.
- if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, flags);
- }
-
- // Otherwise, we're going to parse the first statement in the list
- // of statements within the parentheses.
- pm_accepts_block_stack_push(parser, true);
- context_push(parser, PM_CONTEXT_PARENS);
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
- context_pop(parser);
-
- // Determine if this statement is followed by a terminator. In the
- // case of a single statement, this is fine. But in the case of
- // multiple statements it's required.
- bool terminator_found = false;
-
- if (accept1(parser, PM_TOKEN_SEMICOLON)) {
- terminator_found = true;
- flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
- } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
- terminator_found = true;
- }
-
- if (terminator_found) {
- while (true) {
- if (accept1(parser, PM_TOKEN_SEMICOLON)) {
- flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
- } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
- break;
- }
- }
- }
-
- // If we hit a right parenthesis, then we're done parsing the
- // parentheses node, and we can check which kind of node we should
- // return.
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
- lex_state_set(parser, PM_LEX_STATE_ENDARG);
- }
-
- parser_lex(parser);
- pm_accepts_block_stack_pop(parser);
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
- // If we have a single statement and are ending on a right
- // parenthesis, then we need to check if this is possibly a
- // multiple target node.
- pm_multi_target_node_t *multi_target;
-
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
- multi_target = (pm_multi_target_node_t *) statement;
- } else {
- multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, statement);
- }
-
- pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
-
- multi_target->lparen_loc = lparen_loc;
- multi_target->rparen_loc = rparen_loc;
- multi_target->base.location.start = lparen_loc.start;
- multi_target->base.location.end = rparen_loc.end;
-
- pm_node_t *result;
- if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
- result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
- accept1(parser, PM_TOKEN_NEWLINE);
- } else {
- result = (pm_node_t *) multi_target;
- }
-
- if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
- // All set, this is explicitly allowed by the parent
- // context.
- } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
- // All set, we're inside a for loop and we're parsing
- // multiple targets.
- } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
- // Multi targets are not allowed when it's not a
- // statement level.
- pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
- } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
- // Multi targets must be followed by an equal sign in
- // order to be valid (or a right parenthesis if they are
- // nested).
- pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
- }
-
- return result;
- }
-
- // If we have a single statement and are ending on a right parenthesis
- // and we didn't return a multiple assignment node, then we can return a
- // regular parentheses node now.
- pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(parser, statements, statement, true);
-
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
- }
-
- // If we have more than one statement in the set of parentheses,
- // then we are going to parse all of them as a list of statements.
- // We'll do that here.
- context_push(parser, PM_CONTEXT_PARENS);
- flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-
- pm_statements_node_t *statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(parser, statements, statement, true);
-
- // If we didn't find a terminator and we didn't find a right
- // parenthesis, then this is a syntax error.
- if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
- }
-
- // Parse each statement within the parentheses.
- while (true) {
- pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
- pm_statements_node_body_append(parser, statements, node, true);
-
- // If we're recovering from a syntax error, then we need to stop
- // parsing the statements now.
- if (parser->recovering) {
- // If this is the level of context where the recovery has
- // happened, then we can mark the parser as done recovering.
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
- break;
- }
-
- // If we couldn't parse an expression at all, then we need to
- // bail out of the loop.
- if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
-
- // If we successfully parsed a statement, then we are going to
- // need terminator to delimit them.
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
- } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- break;
- } else if (!match1(parser, PM_TOKEN_EOF)) {
- // If we're at the end of the file, then we're going to add
- // an error after this for the ) anyway.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
- }
- }
-
- context_pop(parser);
- pm_accepts_block_stack_pop(parser);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
- // When we're parsing multi targets, we allow them to be followed by
- // a right parenthesis if they are at the statement level. This is
- // only possible if they are the final statement in a parentheses.
- // We need to explicitly reject that here.
- {
- pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
-
- if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
- pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
- pm_multi_target_node_targets_append(parser, multi_target, statement);
-
- statement = (pm_node_t *) multi_target;
- statements->body.nodes[statements->body.size - 1] = statement;
- }
-
- if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
- const uint8_t *offset = statement->location.end;
- pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
- pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
-
- statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
- statements->body.nodes[statements->body.size - 1] = statement;
-
- pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
- }
- }
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- pm_void_statements_check(parser, statements, true);
- return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous, flags);
- }
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+ return parse_parentheses(parser, binding_power, depth);
case PM_TOKEN_BRACE_LEFT: {
// If we were passed a current_hash_keys via the parser, then that
// means we're already parsing a hash and we want to share the set
@@ -18409,14 +19192,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_accepts_block_stack_push(parser, true);
parser_lex(parser);
- pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
+ pm_token_t opening = parser->previous;
+ pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
if (current_hash_keys != NULL) {
- parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
+ parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
} else {
pm_static_literals_t hash_keys = { 0 };
- parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
+ parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
pm_static_literals_free(&hash_keys);
}
@@ -18424,26 +19208,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_accepts_block_stack_pop(parser);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
- pm_hash_node_closing_loc_set(node, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
+ pm_hash_node_closing_loc_set(parser, node, &parser->previous);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_CHARACTER_LITERAL: {
- parser_lex(parser);
-
- pm_token_t opening = parser->previous;
- opening.type = PM_TOKEN_STRING_BEGIN;
- opening.end = opening.start + 1;
-
- pm_token_t content = parser->previous;
- content.type = PM_TOKEN_STRING_CONTENT;
- content.start = content.start + 1;
+ pm_node_t *node = UP(pm_string_node_create_current_string(
+ parser,
+ &(pm_token_t) {
+ .type = PM_TOKEN_STRING_BEGIN,
+ .start = parser->current.start,
+ .end = parser->current.start + 1
+ },
+ &(pm_token_t) {
+ .type = PM_TOKEN_STRING_CONTENT,
+ .start = parser->current.start + 1,
+ .end = parser->current.end
+ },
+ NULL
+ ));
- pm_token_t closing = not_provided(parser);
- pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
pm_node_flag_set(node, parse_unescaped_encoding(parser));
+ // Skip past the character literal here, since now we have handled
+ // parser->explicit_encoding correctly.
+ parser_lex(parser);
+
// Characters can be followed by strings in which case they are
// automatically concatenated.
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
@@ -18454,7 +19245,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18470,16 +19261,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// fact a method call, not a constant read.
if (
match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
- (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+ ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
match1(parser, PM_TOKEN_BRACE_LEFT)
) {
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
+ return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
}
- pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
// If we get here, then we have a comma immediately following a
@@ -18494,7 +19285,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t delimiter = parser->previous;
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
+ pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18507,7 +19298,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t operator = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// Unary .. and ... are special because these are non-associative
// operators that can also be unary operators. In this case we need
@@ -18517,23 +19308,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
}
- return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
+ return UP(pm_range_node_create(parser, NULL, &operator, right));
}
case PM_TOKEN_FLOAT:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
+ return UP(pm_float_node_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_IMAGINARY:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
+ return UP(pm_float_node_imaginary_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_RATIONAL:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
+ return UP(pm_float_node_rational_create(parser, &parser->previous));
case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
parser_lex(parser);
- return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
+ return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
case PM_TOKEN_NUMBERED_REFERENCE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18543,7 +19334,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_GLOBAL_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18553,7 +19344,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_BACK_REFERENCE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18575,26 +19366,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_call_node_t *call = (pm_call_node_t *) node;
pm_arguments_t arguments = { 0 };
- if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
+ if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
// Since we found arguments, we need to turn off the
// variable call bit in the flags.
- pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
+ pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
call->opening_loc = arguments.opening_loc;
call->arguments = arguments.arguments;
call->closing_loc = arguments.closing_loc;
call->block = arguments.block;
- if (arguments.block != NULL) {
- call->base.location.end = arguments.block->location.end;
- } else if (arguments.closing_loc.start == NULL) {
- if (arguments.arguments != NULL) {
- call->base.location.end = arguments.arguments->base.location.end;
- } else {
- call->base.location.end = call->message_loc.end;
- }
+ const pm_location_t *end = pm_arguments_end(&arguments);
+ if (end == NULL) {
+ PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
} else {
- call->base.location.end = arguments.closing_loc.end;
+ PM_NODE_LENGTH_SET_LOCATION(call, end);
}
}
} else {
@@ -18602,19 +19388,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// can still be a method call if it is followed by arguments or
// a block, so we need to check for that here.
if (
- (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
+ ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
(pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
match1(parser, PM_TOKEN_BRACE_LEFT)
) {
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
// If we're about to convert an 'it' implicit local
// variable read into a method call, we need to remove
// it from the list of implicit local variables.
- parse_target_implicit_parameter(parser, node);
+ pm_node_unreference(parser, node);
} else {
// Otherwise, we're about to convert a regular local
// variable read into a method call, in which case we
@@ -18622,16 +19408,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// purposes of warnings.
assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
- if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
+ pm_node_unreference(parser, node);
} else {
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
}
}
- pm_node_destroy(parser, node);
- return (pm_node_t *) fcall;
+ return UP(fcall);
}
}
@@ -18663,12 +19448,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t content = parse_strings_empty_content(parser->previous.start);
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
- node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+ node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
} else {
- node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
+ node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
}
- node->location.end = opening.end;
+ PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
} else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
// If we get here, then we tried to find something in the
// heredoc but couldn't actually parse anything, so we'll just
@@ -18676,7 +19461,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
//
// parse_string_part handles its own errors, so there is no need
// for us to add one here.
- node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
} else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
// If we get here, then the part that we parsed was plain string
// content and we're at the end of the heredoc, so we can return
@@ -18685,8 +19470,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_flag_set(part, parse_unescaped_encoding(parser));
pm_string_node_t *cast = (pm_string_node_t *) part;
- cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
- cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
+ cast->opening_loc = TOK2LOC(parser, &opening);
+ cast->closing_loc = TOK2LOC(parser, &parser->current);
cast->base.location = cast->opening_loc;
if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
@@ -18695,21 +19480,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
- parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
+ parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
}
- node = (pm_node_t *) cast;
+ node = UP(cast);
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
} else {
// If we get here, then we have multiple parts in the heredoc,
// so we'll need to create an interpolated string node to hold
// them all.
pm_node_list_t parts = { 0 };
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_node_list_append(&parts, part);
+ pm_node_list_append(parser->arena, &parts, part);
}
}
@@ -18720,19 +19505,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
cast->parts = parts;
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
- pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
+ pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
cast->base.location = cast->opening_loc;
- node = (pm_node_t *) cast;
+ node = UP(cast);
} else {
pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
- pm_node_list_free(&parts);
expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
- pm_interpolated_string_node_closing_set(cast, &parser->previous);
+ pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
cast->base.location = cast->opening_loc;
- node = (pm_node_t *) cast;
+ node = UP(cast);
}
// If this is a heredoc that is indented with a ~, then we need
@@ -18757,7 +19541,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
case PM_TOKEN_INSTANCE_VARIABLE: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -18766,34 +19550,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return node;
}
case PM_TOKEN_INTEGER: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_IMAGINARY: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_RATIONAL: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
}
case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
- pm_node_flags_t base = parser->integer_base;
+ pm_node_flags_t base = parser->integer.base;
parser_lex(parser);
- return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
+ return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
}
case PM_TOKEN_KEYWORD___ENCODING__:
parser_lex(parser);
- return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
+ return UP(pm_source_encoding_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD___FILE__:
parser_lex(parser);
- return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
+ return UP(pm_source_file_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD___LINE__:
parser_lex(parser);
- return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
+ return UP(pm_source_line_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_ALIAS: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
@@ -18813,245 +19597,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
}
- } else {
+ } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+ old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
}
- return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
+ return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
}
case PM_SYMBOL_NODE:
case PM_INTERPOLATED_SYMBOL_NODE: {
- if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
+ if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
+ old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
}
}
PRISM_FALLTHROUGH
default:
- return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
- }
- }
- case PM_TOKEN_KEYWORD_CASE: {
- size_t opening_newline_index = token_newline_index(parser);
- parser_lex(parser);
-
- pm_token_t case_keyword = parser->previous;
- pm_node_t *predicate = NULL;
-
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- predicate = NULL;
- } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
- predicate = NULL;
- } else if (!token_begins_expression_p(parser->current.type)) {
- predicate = NULL;
- } else {
- predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
- while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
- }
-
- if (match1(parser, PM_TOKEN_KEYWORD_END)) {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
- parser_lex(parser);
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
- }
-
- // At this point we can create a case node, though we don't yet know
- // if it is a case-in or case-when node.
- pm_token_t end_keyword = not_provided(parser);
- pm_node_t *node;
-
- if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
- pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
- pm_static_literals_t literals = { 0 };
-
- // At this point we've seen a when keyword, so we know this is a
- // case-when node. We will continue to parse the when nodes
- // until we hit the end of the list.
- while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
- parser_lex(parser);
-
- pm_token_t when_keyword = parser->previous;
- pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
-
- do {
- if (accept1(parser, PM_TOKEN_USTAR)) {
- pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
-
- pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
- pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
-
- if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
- } else {
- pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
- pm_when_node_conditions_append(when_node, condition);
-
- // If we found a missing node, then this is a syntax
- // error and we should stop looping.
- if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
-
- // If this is a string node, then we need to mark it
- // as frozen because when clause strings are frozen.
- if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
- pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
- } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
- pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
- }
-
- pm_when_clause_static_literals_add(parser, &literals, condition);
- }
- } while (accept1(parser, PM_TOKEN_COMMA));
-
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
- pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
- }
- } else {
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
- pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
- }
-
- if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
- if (statements != NULL) {
- pm_when_node_statements_set(when_node, statements);
- }
- }
-
- pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
- }
-
- // If we didn't parse any conditions (in or when) then we need
- // to indicate that we have an error.
- if (case_node->conditions.size == 0) {
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- }
-
- pm_static_literals_free(&literals);
- node = (pm_node_t *) case_node;
- } else {
- pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
-
- // If this is a case-match node (i.e., it is a pattern matching
- // case statement) then we must have a predicate.
- if (predicate == NULL) {
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
- }
-
- // At this point we expect that we're parsing a case-in node. We
- // will continue to parse the in nodes until we hit the end of
- // the list.
- while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
-
- bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
- parser->pattern_matching_newlines = true;
-
- lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
- parser->command_start = false;
- parser_lex(parser);
-
- pm_token_t in_keyword = parser->previous;
-
- pm_constant_id_list_t captures = { 0 };
- pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
-
- parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- pm_constant_id_list_free(&captures);
-
- // Since we're in the top-level of the case-in node we need
- // to check for guard clauses in the form of `if` or
- // `unless` statements.
- if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
- pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
- pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
- } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
- pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
- pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
- }
-
- // Now we need to check for the terminator of the in node's
- // pattern. It can be a newline or semicolon optionally
- // followed by a `then` keyword.
- pm_token_t then_keyword;
- if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
- then_keyword = parser->previous;
- } else {
- then_keyword = not_provided(parser);
- }
- } else {
- expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
- then_keyword = parser->previous;
- }
-
- // Now we can actually parse the statements associated with
- // the in node.
- pm_statements_node_t *statements;
- if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- statements = NULL;
- } else {
- statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
- }
-
- // Now that we have the full pattern and statements, we can
- // create the node and attach it to the case node.
- pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
- pm_case_match_node_condition_append(case_node, condition);
- }
-
- // If we didn't parse any conditions (in or when) then we need
- // to indicate that we have an error.
- if (case_node->conditions.size == 0) {
- pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
- }
-
- node = (pm_node_t *) case_node;
- }
-
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
- pm_token_t else_keyword = parser->previous;
- pm_else_node_t *else_node;
-
- if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
- else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
- } else {
- else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
- }
-
- if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
- pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
- } else {
- pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
- }
- }
-
- parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
-
- if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
- pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
- } else {
- pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
+ return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
}
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return node;
}
+ case PM_TOKEN_KEYWORD_CASE:
+ return parse_case(parser, flags, depth);
case PM_TOKEN_KEYWORD_BEGIN: {
size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
@@ -19072,15 +19638,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
-
- begin_node->base.location.end = parser->previous.end;
- pm_begin_node_end_keyword_set(begin_node, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
+ PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
+ pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) begin_node;
+ return UP(begin_node);
}
case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
pm_node_list_t current_block_exits = { 0 };
@@ -19097,16 +19660,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
pm_context_t context = parser->current_context->context;
if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
}
flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+ return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_BREAK:
case PM_TOKEN_KEYWORD_NEXT:
@@ -19123,29 +19684,44 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
- parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
+ pm_token_t next = parser->current;
+ parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
+
+ // Reject `foo && return bar`.
+ if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type));
+ }
+ }
+
+ // It's possible that we've parsed a block argument through our
+ // call to parse_arguments. If we found one, we should mark it
+ // as invalid and destroy it, as we don't have a place for it.
+ if (arguments.block != NULL) {
+ pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
+ pm_node_unreference(parser, arguments.block);
+ arguments.block = NULL;
}
}
switch (keyword.type) {
case PM_TOKEN_KEYWORD_BREAK: {
- pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
if (!parser->partial_script) parse_block_exit(parser, node);
return node;
}
case PM_TOKEN_KEYWORD_NEXT: {
- pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
if (!parser->partial_script) parse_block_exit(parser, node);
return node;
}
case PM_TOKEN_KEYWORD_RETURN: {
- pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
+ pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
parse_return(parser, node);
return node;
}
default:
assert(false && "unreachable");
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
}
}
case PM_TOKEN_KEYWORD_SUPER: {
@@ -19153,24 +19729,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t keyword = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
if (
- arguments.opening_loc.start == NULL &&
+ arguments.opening_loc.length == 0 &&
arguments.arguments == NULL &&
((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
) {
- return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
+ return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
}
- return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
+ return UP(pm_super_node_create(parser, &keyword, &arguments));
}
case PM_TOKEN_KEYWORD_YIELD: {
parser_lex(parser);
pm_token_t keyword = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
// It's possible that we've parsed a block argument through our
// call to parse_arguments_list. If we found one, we should mark it
@@ -19178,434 +19754,25 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// yield node.
if (arguments.block != NULL) {
pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
- pm_node_destroy(parser, arguments.block);
+ pm_node_unreference(parser, arguments.block);
arguments.block = NULL;
}
- pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
+ pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
return node;
}
- case PM_TOKEN_KEYWORD_CLASS: {
- size_t opening_newline_index = token_newline_index(parser);
- parser_lex(parser);
-
- pm_token_t class_keyword = parser->previous;
- pm_do_loop_stack_push(parser, false);
-
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- if (accept1(parser, PM_TOKEN_LESS_LESS)) {
- pm_token_t operator = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
-
- pm_parser_scope_push(parser, true);
- if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
- }
-
- pm_node_t *statements = NULL;
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
- }
-
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
- pm_parser_scope_pop(parser);
- pm_do_loop_stack_pop(parser);
-
- flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
- }
-
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
- pm_token_t name = parser->previous;
- if (name.type != PM_TOKEN_CONSTANT) {
- pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
- }
-
- pm_token_t inheritance_operator;
- pm_node_t *superclass;
-
- if (match1(parser, PM_TOKEN_LESS)) {
- inheritance_operator = parser->current;
- lex_state_set(parser, PM_LEX_STATE_BEG);
-
- parser->command_start = true;
- parser_lex(parser);
-
- superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
- } else {
- inheritance_operator = not_provided(parser);
- superclass = NULL;
- }
-
- pm_parser_scope_push(parser, true);
-
- if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
- expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
- } else {
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- }
- pm_node_t *statements = NULL;
-
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
- }
-
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
-
- if (context_def_p(parser)) {
- pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
- }
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
- pm_parser_scope_pop(parser);
- pm_do_loop_stack_pop(parser);
-
- if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
- pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
- }
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
- }
- case PM_TOKEN_KEYWORD_DEF: {
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- pm_token_t def_keyword = parser->current;
- size_t opening_newline_index = token_newline_index(parser);
-
- pm_node_t *receiver = NULL;
- pm_token_t operator = not_provided(parser);
- pm_token_t name;
-
- // This context is necessary for lexing `...` in a bare params
- // correctly. It must be pushed before lexing the first param, so it
- // is here.
- context_push(parser, PM_CONTEXT_DEF_PARAMS);
- parser_lex(parser);
-
- // This will be false if the method name is not a valid identifier
- // but could be followed by an operator.
- bool valid_name = true;
-
- switch (parser->current.type) {
- case PM_CASE_OPERATOR:
- pm_parser_scope_push(parser, true);
- lex_state_set(parser, PM_LEX_STATE_ENDFN);
- parser_lex(parser);
-
- name = parser->previous;
- break;
- case PM_TOKEN_IDENTIFIER: {
- parser_lex(parser);
-
- if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
- receiver = parse_variable_call(parser);
-
- pm_parser_scope_push(parser, true);
- lex_state_set(parser, PM_LEX_STATE_FNAME);
- parser_lex(parser);
-
- operator = parser->previous;
- name = parse_method_definition_name(parser);
- } else {
- pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
- pm_parser_scope_push(parser, true);
-
- name = parser->previous;
- }
-
- break;
- }
- case PM_TOKEN_INSTANCE_VARIABLE:
- case PM_TOKEN_CLASS_VARIABLE:
- case PM_TOKEN_GLOBAL_VARIABLE:
- valid_name = false;
- PRISM_FALLTHROUGH
- case PM_TOKEN_CONSTANT:
- case PM_TOKEN_KEYWORD_NIL:
- case PM_TOKEN_KEYWORD_SELF:
- case PM_TOKEN_KEYWORD_TRUE:
- case PM_TOKEN_KEYWORD_FALSE:
- case PM_TOKEN_KEYWORD___FILE__:
- case PM_TOKEN_KEYWORD___LINE__:
- case PM_TOKEN_KEYWORD___ENCODING__: {
- pm_parser_scope_push(parser, true);
- parser_lex(parser);
-
- pm_token_t identifier = parser->previous;
-
- if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
- lex_state_set(parser, PM_LEX_STATE_FNAME);
- parser_lex(parser);
- operator = parser->previous;
-
- switch (identifier.type) {
- case PM_TOKEN_CONSTANT:
- receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_INSTANCE_VARIABLE:
- receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_CLASS_VARIABLE:
- receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_GLOBAL_VARIABLE:
- receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_NIL:
- receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_SELF:
- receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_TRUE:
- receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD_FALSE:
- receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD___FILE__:
- receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD___LINE__:
- receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
- break;
- case PM_TOKEN_KEYWORD___ENCODING__:
- receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
- break;
- default:
- break;
- }
-
- name = parse_method_definition_name(parser);
- } else {
- if (!valid_name) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
- }
-
- name = identifier;
- }
- break;
- }
- case PM_TOKEN_PARENTHESIS_LEFT: {
- // The current context is `PM_CONTEXT_DEF_PARAMS`, however
- // the inner expression of this parenthesis should not be
- // processed under this context. Thus, the context is popped
- // here.
- context_pop(parser);
- parser_lex(parser);
-
- pm_token_t lparen = parser->previous;
- pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
-
- accept1(parser, PM_TOKEN_NEWLINE);
- expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- pm_token_t rparen = parser->previous;
-
- lex_state_set(parser, PM_LEX_STATE_FNAME);
- expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
-
- operator = parser->previous;
- receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0);
-
- // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
- // reason as described the above.
- pm_parser_scope_push(parser, true);
- context_push(parser, PM_CONTEXT_DEF_PARAMS);
- name = parse_method_definition_name(parser);
- break;
- }
- default:
- pm_parser_scope_push(parser, true);
- name = parse_method_definition_name(parser);
- break;
- }
-
- pm_token_t lparen;
- pm_token_t rparen;
- pm_parameters_node_t *params;
-
- switch (parser->current.type) {
- case PM_TOKEN_PARENTHESIS_LEFT: {
- parser_lex(parser);
- lparen = parser->previous;
-
- if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- params = NULL;
- } else {
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
- }
-
- lex_state_set(parser, PM_LEX_STATE_BEG);
- parser->command_start = true;
-
- context_pop(parser);
- if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
- parser->previous.start = parser->previous.end;
- parser->previous.type = PM_TOKEN_MISSING;
- }
-
- rparen = parser->previous;
- break;
- }
- case PM_CASE_PARAMETER: {
- // If we're about to lex a label, we need to add the label
- // state to make sure the next newline is ignored.
- if (parser->current.type == PM_TOKEN_LABEL) {
- lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
- }
-
- lparen = not_provided(parser);
- rparen = not_provided(parser);
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
-
- context_pop(parser);
- break;
- }
- default: {
- lparen = not_provided(parser);
- rparen = not_provided(parser);
- params = NULL;
-
- context_pop(parser);
- break;
- }
- }
-
- pm_node_t *statements = NULL;
- pm_token_t equal;
- pm_token_t end_keyword;
-
- if (accept1(parser, PM_TOKEN_EQUAL)) {
- if (token_is_setter_name(&name)) {
- pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
- }
- equal = parser->previous;
-
- context_push(parser, PM_CONTEXT_DEF);
- pm_do_loop_stack_push(parser, false);
- statements = (pm_node_t *) pm_statements_node_create(parser);
-
- pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
-
- if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
- context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
-
- pm_token_t rescue_keyword = parser->previous;
- pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
- context_pop(parser);
-
- statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
- }
-
- pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
- pm_do_loop_stack_pop(parser);
- context_pop(parser);
- end_keyword = not_provided(parser);
- } else {
- equal = not_provided(parser);
-
- if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
- lex_state_set(parser, PM_LEX_STATE_BEG);
- parser->command_start = true;
- expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
- } else {
- accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
- }
-
- pm_accepts_block_stack_push(parser, true);
- pm_do_loop_stack_push(parser, false);
-
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
- }
-
- pm_accepts_block_stack_pop(parser);
- pm_do_loop_stack_pop(parser);
-
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
- end_keyword = parser->previous;
- }
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
- pm_parser_scope_pop(parser);
-
- /**
- * If the final character is @. As is the case when defining
- * methods to override the unary operators, we should ignore
- * the @ in the same way we do for symbols.
- */
- pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
-
- flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_def_node_create(
- parser,
- name_id,
- &name,
- receiver,
- params,
- statements,
- &locals,
- &def_keyword,
- &operator,
- &lparen,
- &rparen,
- &equal,
- &end_keyword
- );
- }
+ case PM_TOKEN_KEYWORD_CLASS:
+ return parse_class(parser, flags, depth);
+ case PM_TOKEN_KEYWORD_DEF:
+ return parse_def(parser, binding_power, flags, depth);
case PM_TOKEN_KEYWORD_DEFINED: {
parser_lex(parser);
- pm_token_t keyword = parser->previous;
- pm_token_t lparen;
- pm_token_t rparen;
+ pm_token_t keyword = parser->previous;
+ pm_token_t lparen = { 0 };
+ pm_token_t rparen = { 0 };
pm_node_t *expression;
context_push(parser, PM_CONTEXT_DEFINED);
@@ -19615,34 +19782,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
lparen = parser->previous;
if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- expression = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
- lparen = not_provided(parser);
- rparen = not_provided(parser);
+ expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
+ lparen = (pm_token_t) { 0 };
} else {
- expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+ expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
- if (parser->recovering) {
- rparen = not_provided(parser);
- } else {
+ if (!parser->recovering) {
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
rparen = parser->previous;
}
}
} else {
- lparen = not_provided(parser);
- rparen = not_provided(parser);
- expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
+ expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
}
context_pop(parser);
- return (pm_node_t *) pm_defined_node_create(
+ return UP(pm_defined_node_create(
parser,
- &lparen,
+ NTOK2PTR(lparen),
expression,
- &rparen,
- &PM_LOCATION_TOKEN_VALUE(&keyword)
- );
+ NTOK2PTR(rparen),
+ &keyword
+ ));
}
case PM_TOKEN_KEYWORD_END_UPCASE: {
if (binding_power != PM_BINDING_POWER_STATEMENT) {
@@ -19660,12 +19822,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_token_t opening = parser->previous;
pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
- return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
+ return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
}
case PM_TOKEN_KEYWORD_FALSE:
parser_lex(parser);
- return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
+ return UP(pm_false_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_FOR: {
size_t opening_newline_index = token_newline_index(parser);
parser_lex(parser);
@@ -19681,15 +19843,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
+ index = UP(pm_splat_node_create(parser, &star_operator, name));
} else if (token_begins_expression_p(parser->current.type)) {
- index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
+ index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
} else {
pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
- index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
+ index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
}
// Now, if there are multiple index expressions, parse them out.
@@ -19705,16 +19867,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
pm_token_t in_keyword = parser->previous;
- pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
+ pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
- pm_token_t do_keyword;
+ pm_token_t do_keyword = { 0 };
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
- do_keyword = not_provided(parser);
if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type));
}
}
@@ -19724,13 +19885,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
- return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
+ return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
}
case PM_TOKEN_KEYWORD_IF:
if (parser_end_of_line_p(parser)) {
- PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
}
size_t opening_newline_index = token_newline_index(parser);
@@ -19747,26 +19908,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
- if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
- pm_node_destroy(parser, name);
+ if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
} else {
- pm_undef_node_append(undef, name);
+ pm_undef_node_append(parser->arena, undef, name);
while (match1(parser, PM_TOKEN_COMMA)) {
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
parser_lex(parser);
name = parse_undef_argument(parser, (uint16_t) (depth + 1));
- if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
- pm_node_destroy(parser, name);
+ if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
break;
}
- pm_undef_node_append(undef, name);
+ pm_undef_node_append(parser->arena, undef, name);
}
}
- return (pm_node_t *) undef;
+ return UP(undef);
}
case PM_TOKEN_KEYWORD_NOT: {
parser_lex(parser);
@@ -19775,28 +19934,46 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_arguments_t arguments = { 0 };
pm_node_t *receiver = NULL;
+ // The `not` keyword without parentheses is only valid in contexts
+ // where it would be parsed as an expression (i.e., at or below
+ // the `not` binding power level). In other contexts (e.g., method
+ // arguments, array elements, assignment right-hand sides),
+ // parentheses are required: `not(x)`. An exception is made for
+ // endless def bodies, where `not` is valid as both `arg` and
+ // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
+ if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
+ if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
+ pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
+ } else {
+ accept1(parser, PM_TOKEN_NEWLINE);
+ pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
+ }
+
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
+ }
+
accept1(parser, PM_TOKEN_NEWLINE);
if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
pm_token_t lparen = parser->previous;
if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
- receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0);
+ receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
} else {
- arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen);
- receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+ arguments.opening_loc = TOK2LOC(parser, &lparen);
+ receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
if (!parser->recovering) {
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments.closing_loc = TOK2LOC(parser, &parser->previous);
}
}
} else {
- receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
+ receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
+ return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
}
case PM_TOKEN_KEYWORD_UNLESS: {
size_t opening_newline_index = token_newline_index(parser);
@@ -19804,81 +19981,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
}
- case PM_TOKEN_KEYWORD_MODULE: {
- pm_node_list_t current_block_exits = { 0 };
- pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
- size_t opening_newline_index = token_newline_index(parser);
- parser_lex(parser);
- pm_token_t module_keyword = parser->previous;
-
- pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
- pm_token_t name;
-
- // If we can recover from a syntax error that occurred while parsing
- // the name of the module, then we'll handle that here.
- if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
- }
-
- while (accept1(parser, PM_TOKEN_COLON_COLON)) {
- pm_token_t double_colon = parser->previous;
-
- expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
- }
-
- // Here we retrieve the name of the module. If it wasn't a constant,
- // then it's possible that `module foo` was passed, which is a
- // syntax error. We handle that here as well.
- name = parser->previous;
- if (name.type != PM_TOKEN_CONSTANT) {
- pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
- }
-
- pm_parser_scope_push(parser, true);
- accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
- pm_node_t *statements = NULL;
-
- if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
- pm_accepts_block_stack_push(parser, true);
- statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
- }
-
- if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
- assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
- statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
- } else {
- parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
- }
-
- pm_constant_id_list_t locals;
- pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
- pm_parser_scope_pop(parser);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
-
- if (context_def_p(parser)) {
- pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
- }
-
- pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
- }
+ case PM_TOKEN_KEYWORD_MODULE:
+ return parse_module(parser, flags, depth);
case PM_TOKEN_KEYWORD_NIL:
parser_lex(parser);
- return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
+ return UP(pm_nil_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_REDO: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
if (!parser->partial_script) parse_block_exit(parser, node);
return node;
@@ -19886,17 +19997,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_TOKEN_KEYWORD_RETRY: {
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
+ pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
parse_retry(parser, node);
return node;
}
case PM_TOKEN_KEYWORD_SELF:
parser_lex(parser);
- return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
+ return UP(pm_self_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_TRUE:
parser_lex(parser);
- return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
+ return UP(pm_true_node_create(parser, &parser->previous));
case PM_TOKEN_KEYWORD_UNTIL: {
size_t opening_newline_index = token_newline_index(parser);
@@ -19905,16 +20016,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
context_pop(parser);
- pm_token_t do_keyword;
+ pm_token_t do_keyword = { 0 };
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
- do_keyword = not_provided(parser);
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
}
@@ -19927,9 +20037,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
+ return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
}
case PM_TOKEN_KEYWORD_WHILE: {
size_t opening_newline_index = token_newline_index(parser);
@@ -19939,16 +20049,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t keyword = parser->previous;
- pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+ pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
pm_do_loop_stack_pop(parser);
context_pop(parser);
- pm_token_t do_keyword;
+ pm_token_t do_keyword = { 0 };
if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
do_keyword = parser->previous;
} else {
- do_keyword = not_provided(parser);
expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
}
@@ -19961,381 +20070,122 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
+ return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
}
case PM_TOKEN_PERCENT_LOWER_I: {
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
- }
-
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
- }
-
- pm_token_t closing = parser->current;
- if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
- }
- pm_array_node_close_set(array, &closing);
-
- return (pm_node_t *) array;
- }
- case PM_TOKEN_PERCENT_UPPER_I: {
- parser_lex(parser);
- pm_token_t opening = parser->previous;
- pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // This is the current node that we are parsing that will be added to the
- // list of elements.
- pm_node_t *current = NULL;
-
- while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- switch (parser->current.type) {
- case PM_TOKEN_WORDS_SEP: {
- if (current == NULL) {
- // If we hit a separator before we have any content, then we don't
- // need to do anything.
- } else {
- // If we hit a separator after we've hit content, then we need to
- // append that content to the list and reset the current node.
- pm_array_node_elements_append(array, current);
- current = NULL;
- }
-
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+ parser_lex(parser);
+ pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+ pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+ pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
+ pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+ pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
parser_lex(parser);
- break;
- }
- case PM_TOKEN_STRING_CONTENT: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- if (current == NULL) {
- // If we hit content and the current node is NULL, then this is
- // the first string content we've seen. In that case we're going
- // to create a new string node and set that to the current.
- current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
- parser_lex(parser);
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
- // If we hit string content and the current node is an
- // interpolated string, then we need to append the string content
- // to the list of child nodes.
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- parser_lex(parser);
-
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
- } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit string content and the current node is a symbol node,
- // then we need to convert the current node into an interpolated
- // string and add the string content to the list of child nodes.
- pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
- pm_token_t bounds = not_provided(parser);
-
- pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
- pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
- pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
- parser_lex(parser);
-
- pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_symbol_node_append(interpolated, first_string);
- pm_interpolated_symbol_node_append(interpolated, second_string);
-
- xfree(current);
- current = (pm_node_t *) interpolated;
- } else {
- assert(false && "unreachable");
- }
-
- break;
- }
- case PM_TOKEN_EMBVAR: {
- bool start_location_set = false;
- if (current == NULL) {
- // If we hit an embedded variable and the current node is NULL,
- // then this is the start of a new string. We'll set the current
- // node to a new interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit an embedded variable and the current node is a string
- // node, then we'll convert the current into an interpolated
- // string and add the string node to the list of parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-
- current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
- pm_interpolated_symbol_node_append(interpolated, current);
- interpolated->base.location.start = current->location.start;
- start_location_set = true;
- current = (pm_node_t *) interpolated;
- } else {
- // If we hit an embedded variable and the current node is an
- // interpolated string, then we'll just add the embedded variable.
- }
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
- if (!start_location_set) {
- current->location.start = part->location.start;
- }
- break;
- }
- case PM_TOKEN_EMBEXPR_BEGIN: {
- bool start_location_set = false;
- if (current == NULL) {
- // If we hit an embedded expression and the current node is NULL,
- // then this is the start of a new string. We'll set the current
- // node to a new interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
- // If we hit an embedded expression and the current node is a
- // string node, then we'll convert the current into an
- // interpolated string and add the string node to the list of
- // parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-
- current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
- pm_interpolated_symbol_node_append(interpolated, current);
- interpolated->base.location.start = current->location.start;
- start_location_set = true;
- current = (pm_node_t *) interpolated;
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
- // If we hit an embedded expression and the current node is an
- // interpolated string, then we'll just continue on.
- } else {
- assert(false && "unreachable");
- }
+ pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+ pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
- if (!start_location_set) {
- current->location.start = part->location.start;
- }
- break;
+ // current is arena-allocated so no explicit free is needed.
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
}
- default:
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
- parser_lex(parser);
- break;
}
- }
- // If we have a current node, then we need to append it to the list.
- if (current) {
- pm_array_node_elements_append(array, current);
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
}
- pm_array_node_close_set(array, &closing);
+ pm_array_node_close_set(parser, array, &closing);
- return (pm_node_t *) array;
+ return UP(array);
}
+ case PM_TOKEN_PERCENT_UPPER_I:
+ return parse_symbol_array(parser, depth);
case PM_TOKEN_PERCENT_LOWER_W: {
parser_lex(parser);
pm_token_t opening = parser->previous;
pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // skip all leading whitespaces
- accept1(parser, PM_TOKEN_WORDS_SEP);
+ pm_node_t *current = NULL;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
accept1(parser, PM_TOKEN_WORDS_SEP);
if (match1(parser, PM_TOKEN_STRING_END)) break;
- if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- pm_array_node_elements_append(array, string);
+ // Interpolation is not possible but nested heredocs can still lead to
+ // consecutive (disjoint) string tokens when the final newline is escaped.
+ while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+ pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+
+ // Record the string node, moving to interpolation if needed.
+ if (current == NULL) {
+ current = string;
+ } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+ pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+ } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+ pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+ pm_interpolated_string_node_append(parser, interpolated, current);
+ pm_interpolated_string_node_append(parser, interpolated, string);
+ current = UP(interpolated);
+ } else {
+ assert(false && "unreachable");
+ }
+ parser_lex(parser);
}
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ if (current) {
+ pm_array_node_elements_append(parser->arena, array, current);
+ current = NULL;
+ } else {
+ expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+ }
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
}
- pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
- }
- case PM_TOKEN_PERCENT_UPPER_W: {
- parser_lex(parser);
- pm_token_t opening = parser->previous;
- pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- // This is the current node that we are parsing that will be added
- // to the list of elements.
- pm_node_t *current = NULL;
-
- while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
- switch (parser->current.type) {
- case PM_TOKEN_WORDS_SEP: {
- // Reset the explicit encoding if we hit a separator
- // since each element can have its own encoding.
- parser->explicit_encoding = NULL;
-
- if (current == NULL) {
- // If we hit a separator before we have any content,
- // then we don't need to do anything.
- } else {
- // If we hit a separator after we've hit content,
- // then we need to append that content to the list
- // and reset the current node.
- pm_array_node_elements_append(array, current);
- current = NULL;
- }
-
- parser_lex(parser);
- break;
- }
- case PM_TOKEN_STRING_CONTENT: {
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
- pm_node_flag_set(string, parse_unescaped_encoding(parser));
- parser_lex(parser);
-
- if (current == NULL) {
- // If we hit content and the current node is NULL,
- // then this is the first string content we've seen.
- // In that case we're going to create a new string
- // node and set that to the current.
- current = string;
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
- // If we hit string content and the current node is
- // an interpolated string, then we need to append
- // the string content to the list of child nodes.
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
- } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
- // If we hit string content and the current node is
- // a string node, then we need to convert the
- // current node into an interpolated string and add
- // the string content to the list of child nodes.
- pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(interpolated, current);
- pm_interpolated_string_node_append(interpolated, string);
- current = (pm_node_t *) interpolated;
- } else {
- assert(false && "unreachable");
- }
-
- break;
- }
- case PM_TOKEN_EMBVAR: {
- if (current == NULL) {
- // If we hit an embedded variable and the current
- // node is NULL, then this is the start of a new
- // string. We'll set the current node to a new
- // interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
- // If we hit an embedded variable and the current
- // node is a string node, then we'll convert the
- // current into an interpolated string and add the
- // string node to the list of parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(interpolated, current);
- current = (pm_node_t *) interpolated;
- } else {
- // If we hit an embedded variable and the current
- // node is an interpolated string, then we'll just
- // add the embedded variable.
- }
-
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
- break;
- }
- case PM_TOKEN_EMBEXPR_BEGIN: {
- if (current == NULL) {
- // If we hit an embedded expression and the current
- // node is NULL, then this is the start of a new
- // string. We'll set the current node to a new
- // interpolated string.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
- // If we hit an embedded expression and the current
- // node is a string node, then we'll convert the
- // current into an interpolated string and add the
- // string node to the list of parts.
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
- pm_interpolated_string_node_append(interpolated, current);
- current = (pm_node_t *) interpolated;
- } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
- // If we hit an embedded expression and the current
- // node is an interpolated string, then we'll just
- // continue on.
- } else {
- assert(false && "unreachable");
- }
-
- pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
- pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
- break;
- }
- default:
- expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
- parser_lex(parser);
- break;
- }
- }
-
- // If we have a current node, then we need to append it to the list.
- if (current) {
- pm_array_node_elements_append(array, current);
- }
-
- pm_token_t closing = parser->current;
- if (match1(parser, PM_TOKEN_EOF)) {
- pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- } else {
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
- }
-
- pm_array_node_close_set(array, &closing);
- return (pm_node_t *) array;
+ pm_array_node_close_set(parser, array, &closing);
+ return UP(array);
}
+ case PM_TOKEN_PERCENT_UPPER_W:
+ return parse_string_array(parser, depth);
case PM_TOKEN_REGEXP_BEGIN: {
pm_token_t opening = parser->current;
parser_lex(parser);
@@ -20352,10 +20202,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
- pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
- pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
-
- return node;
+ pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
+ pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
+ return UP(node);
}
pm_interpolated_regular_expression_node_t *interpolated;
@@ -20367,7 +20216,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// regular expression) or if it's not then it has interpolation.
pm_string_t unescaped = parser->current_string;
pm_token_t content = parser->current;
- bool ascii_only = parser->current_regular_expression_ascii_only;
parser_lex(parser);
// If we hit an end, then we can create a regular expression
@@ -20376,26 +20224,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
- // If we're not immediately followed by a =~, then we want
- // to parse all of the errors at this point. If it is
- // followed by a =~, then it will get parsed higher up while
- // parsing the named captures as well.
+ // If we're not immediately followed by a =~, then we
+ // parse and validate now. If it is followed by a =~,
+ // then it will get parsed in the =~ handler where
+ // named captures can also be extracted.
if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
- parse_regular_expression_errors(parser, node);
+ pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
}
- pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
- return (pm_node_t *) node;
+ return UP(node);
}
// If we get here, then we have interpolation so we'll need to create
// a regular expression node with interpolation.
interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
-
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
// This is extremely strange, but the first string part of a
// regular expression will always be tagged as binary if we
@@ -20403,7 +20247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
}
- pm_interpolated_regular_expression_node_append(interpolated, part);
+ pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
} else {
// If the first part of the body of the regular expression is not a
// string content, then we have interpolation and we need to create an
@@ -20416,20 +20260,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *part;
while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_interpolated_regular_expression_node_append(interpolated, part);
+ pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
}
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
}
pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
- return (pm_node_t *) interpolated;
+ return UP(interpolated);
}
case PM_TOKEN_BACKTICK:
case PM_TOKEN_PERCENT_LOWER_X: {
@@ -20451,7 +20295,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
};
parser_lex(parser);
- return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
+ return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
}
pm_interpolated_x_string_node_t *node;
@@ -20466,7 +20310,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
if (match1(parser, PM_TOKEN_STRING_END)) {
- pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+ pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
pm_node_flag_set(node, parse_unescaped_encoding(parser));
parser_lex(parser);
return node;
@@ -20476,13 +20320,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// create a string node with interpolation.
node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
- pm_token_t opening = not_provided(parser);
- pm_token_t closing = not_provided(parser);
-
- pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
+ pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
pm_node_flag_set(part, parse_unescaped_encoding(parser));
- pm_interpolated_xstring_node_append(node, part);
+ pm_interpolated_xstring_node_append(parser->arena, node, part);
} else {
// If the first part of the body of the string is not a string
// content, then we have interpolation and we need to create an
@@ -20493,20 +20334,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *part;
while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
- pm_interpolated_xstring_node_append(node, part);
+ pm_interpolated_xstring_node_append(parser->arena, node, part);
}
}
pm_token_t closing = parser->current;
if (match1(parser, PM_TOKEN_EOF)) {
pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
- closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
} else {
expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
}
- pm_interpolated_xstring_node_closing_set(node, &closing);
+ pm_interpolated_xstring_node_closing_set(parser, node, &closing);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_USTAR: {
parser_lex(parser);
@@ -20516,17 +20357,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// still lex past it though and create a missing node place.
if (binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_prefix(parser, diag_id);
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
}
pm_token_t operator = parser->previous;
pm_node_t *name = NULL;
if (token_begins_expression_p(parser->current.type)) {
- name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+ name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
}
- pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
+ pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
if (match1(parser, PM_TOKEN_COMMA)) {
return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
@@ -20542,11 +20383,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_TILDE: {
if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20555,10 +20396,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UMINUS: {
if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20567,22 +20408,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_UMINUS_NUM: {
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
if (accept1(parser, PM_TOKEN_STAR_STAR)) {
pm_token_t exponent_operator = parser->previous;
- pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
- node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
- node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+ pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
+ node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
+ node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
} else {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE:
@@ -20592,7 +20433,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parse_negative_numeric(node);
break;
default:
- node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
+ node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
break;
}
}
@@ -20626,13 +20467,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
- pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
+ pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
break;
}
case PM_CASE_PARAMETER: {
pm_accepts_block_stack_push(parser, false);
- pm_token_t opening = not_provided(parser);
- block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
+ block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
break;
}
@@ -20650,39 +20490,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
opening = parser->previous;
if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
- body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
+ body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
}
parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
- expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
+ expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
} else {
expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
opening = parser->previous;
if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
- pm_accepts_block_stack_push(parser, true);
- body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
- pm_accepts_block_stack_pop(parser);
+ body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
}
if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
- body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
+ body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
} else {
parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
}
- expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
+ expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
}
pm_constant_id_list_t locals;
pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
- pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
+ pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
pm_parser_scope_pop(parser);
pm_accepts_block_stack_pop(parser);
- return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
+ return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
}
case PM_TOKEN_UPLUS: {
if (binding_power > PM_BINDING_POWER_UNARY) {
@@ -20691,13 +20529,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
parser_lex(parser);
pm_token_t operator = parser->previous;
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
- return (pm_node_t *) node;
+ return UP(node);
}
case PM_TOKEN_STRING_BEGIN:
- return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
+ return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
case PM_TOKEN_SYMBOL_BEGIN: {
pm_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
@@ -20720,17 +20558,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// If we get here, then we are assuming this token is closing a
// parent context, so we'll indicate that to the user so that
// they know how we behaved.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable));
} else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
// We're going to make a special case here, because "cannot
// parse expression" is pretty generic, and we know here that we
// have an unexpected token.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type));
} else {
pm_parser_err_prefix(parser, diag_id);
}
- return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
}
}
}
@@ -20745,8 +20583,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
* or any of the binary operators that can be written to a variable.
*/
static pm_node_t *
-parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
- pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
+parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
+ pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
+
+ // Assignments whose value is a command call (e.g., a = b c) can only
+ // be followed by modifiers (if/unless/while/until/rescue) and not by
+ // operators with higher binding power. If we find one, emit an error
+ // and skip the operator and its right-hand side.
+ if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ parser_lex(parser);
+ parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ }
// Contradicting binding powers, the right-hand-side value of the assignment
// allows the `rescue` modifier.
@@ -20756,10 +20604,10 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
pm_token_t rescue = parser->current;
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
context_pop(parser);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+ return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
}
return value;
@@ -20814,35 +20662,46 @@ parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
* operator that allows multiple values after it.
*/
static pm_node_t *
-parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
bool permitted = true;
if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
- pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
parse_assignment_value_local(parser, value);
bool single_value = true;
- if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
+ // Block calls (command call + do block, e.g., `foo bar do end`) cannot
+ // be followed by a comma to form a multi-value RHS because each element
+ // of a multi-value assignment must be an `arg`, not a `block_call`.
+ if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
single_value = false;
- pm_token_t opening = not_provided(parser);
- pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
- pm_array_node_elements_append(array, value);
- value = (pm_node_t *) array;
+ pm_array_node_t *array = pm_array_node_create(parser, NULL);
+ pm_array_node_elements_append(parser->arena, array, value);
+ value = UP(array);
while (accept1(parser, PM_TOKEN_COMMA)) {
pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
- pm_array_node_elements_append(array, element);
- if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
+ pm_array_node_elements_append(parser->arena, array, element);
+ if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
parse_assignment_value_local(parser, element);
}
}
+ // Assignments whose value is a command call (e.g., a = b c) can only
+ // be followed by modifiers (if/unless/while/until/rescue) and not by
+ // operators with higher binding power. If we find one, emit an error
+ // and skip the operator and its right-hand side.
+ if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+ parser_lex(parser);
+ parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ }
+
// Contradicting binding powers, the right-hand-side value of the assignment
// allows the `rescue` modifier.
if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
@@ -20857,15 +20716,15 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
// but without parenthesis.
if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
pm_call_node_t *call_node = (pm_call_node_t *) value;
- if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
+ if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
accepts_command_call_inner = true;
}
}
- pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
context_pop(parser);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
+ return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
}
return value;
@@ -20882,43 +20741,18 @@ static void
parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
if (call_node->arguments != NULL) {
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
- pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
+ pm_node_unreference(parser, UP(call_node->arguments));
call_node->arguments = NULL;
}
if (call_node->block != NULL) {
pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
- pm_node_destroy(parser, (pm_node_t *) call_node->block);
+ pm_node_unreference(parser, UP(call_node->block));
call_node->block = NULL;
}
}
-/**
- * This struct is used to pass information between the regular expression parser
- * and the named capture callback.
- */
-typedef struct {
- /** The parser that is parsing the regular expression. */
- pm_parser_t *parser;
-
- /** The call node wrapping the regular expression node. */
- pm_call_node_t *call;
-
- /** The match write node that is being created. */
- pm_match_write_node_t *match;
-
- /** The list of names that have been parsed. */
- pm_constant_id_list_t names;
-
- /**
- * Whether the content of the regular expression is shared. This impacts
- * whether or not we used owned constants or shared constants in the
- * constant pool for the names of the captures.
- */
- bool shared;
-} parse_regular_expression_named_capture_data_t;
-
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
cursor++;
@@ -20939,7 +20773,7 @@ pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const
return cursor;
}
-static inline const uint8_t *
+static PRISM_INLINE const uint8_t *
pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
uint8_t value = (uint8_t) (*cursor - '0');
cursor++;
@@ -20958,8 +20792,8 @@ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, con
return cursor;
}
-static inline const uint8_t *
-pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
+static PRISM_INLINE const uint8_t *
+pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
const uint8_t *start = cursor - 1;
cursor++;
@@ -20970,7 +20804,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
if (*cursor != '{') {
size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
- uint32_t value = escape_unicode(parser, cursor, length);
+ uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
@@ -20990,7 +20824,10 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
}
size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
- uint32_t value = escape_unicode(parser, cursor, length);
+ if (length == 0) {
+ break;
+ }
+ uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
(void) pm_buffer_append_unicode_codepoint(unescaped, value);
cursor += length;
@@ -21000,7 +20837,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con
}
static void
-pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) {
+pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
const uint8_t *end = source + length;
pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
@@ -21018,7 +20855,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
break;
case 'u':
- cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end);
+ cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
break;
default:
pm_buffer_append_byte(unescaped, '\\');
@@ -21040,10 +20877,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8
* capture group.
*/
static void
-parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
- parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data;
-
- pm_parser_t *parser = callback_data->parser;
+parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
pm_call_node_t *call = callback_data->call;
pm_constant_id_list_t *names = &callback_data->names;
@@ -21061,55 +20895,56 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
// unescaped, which is what we need.
const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
if (PRISM_UNLIKELY(cursor != NULL)) {
- pm_named_capture_escape(parser, &unescaped, source, length, cursor);
+ pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
source = (const uint8_t *) pm_buffer_value(&unescaped);
length = pm_buffer_length(&unescaped);
}
- pm_location_t location;
+ const uint8_t *start;
+ const uint8_t *end;
pm_constant_id_t name;
// If the name of the capture group isn't a valid identifier, we do
// not add it to the local table.
if (!pm_slice_is_valid_local(parser, source, source + length)) {
- pm_buffer_free(&unescaped);
+ pm_buffer_cleanup(&unescaped);
return;
}
- if (callback_data->shared) {
+ if (shared) {
// If the unescaped string is a slice of the source, then we can
// copy the names directly. The pointers will line up.
- location = (pm_location_t) { .start = source, .end = source + length };
- name = pm_parser_constant_id_location(parser, location.start, location.end);
+ start = source;
+ end = source + length;
+ name = pm_parser_constant_id_raw(parser, start, end);
} else {
// Otherwise, the name is a slice of the malloc-ed owned string,
// in which case we need to copy it out into a new string.
- location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
-
- void *memory = xmalloc(length);
- if (memory == NULL) abort();
+ start = parser->start + PM_NODE_START(call->receiver);
+ end = parser->start + PM_NODE_END(call->receiver);
+ uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
memcpy(memory, source, length);
- name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
+ name = pm_parser_constant_id_owned(parser, memory, length);
}
// Add this name to the list of constants if it is valid, not duplicated,
// and not a keyword.
if (name != 0 && !pm_constant_id_list_includes(names, name)) {
- pm_constant_id_list_append(names, name);
+ pm_constant_id_list_append(parser->arena, names, name);
int depth;
if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
// If the local is not already a local but it is a keyword, then we
// do not want to add a capture for this.
if (pm_local_is_keyword((const char *) source, length)) {
- pm_buffer_free(&unescaped);
+ pm_buffer_cleanup(&unescaped);
return;
}
// If the identifier is not already a local, then we will add it to
// the local table.
- pm_parser_local_add(parser, name, location.start, location.end, 0);
+ pm_parser_local_add(parser, name, start, end, 0);
}
// Here we lazily create the MatchWriteNode since we know we're
@@ -21120,45 +20955,37 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
// Next, create the local variable target and add it to the list of
// targets for the match.
- pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
- pm_node_list_append(&callback_data->match->targets, target);
+ pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
+ pm_node_list_append(parser->arena, &callback_data->match->targets, target);
}
- pm_buffer_free(&unescaped);
+ pm_buffer_cleanup(&unescaped);
}
/**
- * Potentially change a =~ with a regular expression with named captures into a
- * match write node.
+ * Potentially change a =~ with an interpolated regular expression with named
+ * captures into a match write node. This is for the interpolated case where
+ * we have concatenated content rather than a regular expression node.
*/
static pm_node_t *
-parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
- parse_regular_expression_named_capture_data_t callback_data = {
- .parser = parser,
+parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
+ pm_regexp_name_data_t callback_data = {
.call = call,
+ .match = NULL,
.names = { 0 },
- .shared = content->type == PM_STRING_SHARED
};
- parse_regular_expression_error_data_t error_data = {
- .parser = parser,
- .start = call->receiver->location.start,
- .end = call->receiver->location.end,
- .shared = content->type == PM_STRING_SHARED
- };
-
- pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
- pm_constant_id_list_free(&callback_data.names);
+ pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
if (callback_data.match != NULL) {
- return (pm_node_t *) callback_data.match;
+ return UP(callback_data.match);
} else {
- return (pm_node_t *) call;
+ return UP(call);
}
}
-static inline pm_node_t *
-parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
+static PRISM_INLINE pm_node_t *
+parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
pm_token_t token = parser->current;
switch (token.type) {
@@ -21171,13 +20998,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// is parsed because it could be referenced in the value.
pm_call_node_t *call_node = (pm_call_node_t *) node;
if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
+ pm_parser_local_add_location(parser, &call_node->message_loc, 0);
}
}
PRISM_FALLTHROUGH
case PM_CASE_WRITABLE: {
+ // When we have `it = value`, we need to add `it` as a local
+ // variable before parsing the value, in case the value
+ // references the variable.
+ if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
+ pm_parser_local_add_location(parser, &node->location, 0);
+ }
+
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
@@ -21190,8 +21024,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_multi_target_node_targets_append(parser, multi_target, node);
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
- return parse_write(parser, (pm_node_t *) multi_target, &token, value);
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ return parse_write(parser, UP(multi_target), &token, value);
}
case PM_SOURCE_ENCODING_NODE:
case PM_FALSE_NODE:
@@ -21203,7 +21037,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// In these special cases, we have specific error messages
// and we will replace them with local variable writes.
parser_lex(parser);
- pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
return parse_unwriteable_write(parser, node, &token, value);
}
default:
@@ -21224,71 +21058,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
- parse_target_implicit_parameter(parser, node);
- pm_node_destroy(parser, node);
+ pm_node_unreference(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
+ pm_node_unreference(parser, node);
}
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
- pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
@@ -21298,16 +21126,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_location_t *message_loc = &cast->message_loc;
- pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
-
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_node_destroy(parser, (pm_node_t *) cast);
return result;
}
@@ -21319,8 +21144,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// this is an aref expression, and we can transform it into
// an aset expression.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ return UP(pm_index_and_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
@@ -21331,8 +21156,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
+ return UP(pm_call_and_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -21358,71 +21183,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
- parse_target_implicit_parameter(parser, node);
- pm_node_destroy(parser, node);
+ pm_node_unreference(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
+ pm_node_unreference(parser, node);
}
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
- pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
@@ -21432,16 +21251,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_location_t *message_loc = &cast->message_loc;
- pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
-
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
+ pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_node_destroy(parser, (pm_node_t *) cast);
return result;
}
@@ -21453,8 +21269,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// this is an aref expression, and we can transform it into
// an aset expression.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ return UP(pm_index_or_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
@@ -21465,8 +21281,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
+ return UP(pm_call_or_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -21502,71 +21318,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_GLOBAL_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CLASS_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_CONSTANT_PATH_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
return parse_shareable_constant_write(parser, write);
}
case PM_CONSTANT_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return parse_shareable_constant_write(parser, write);
}
case PM_INSTANCE_VARIABLE_READ_NODE: {
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
- pm_node_destroy(parser, node);
return result;
}
case PM_IT_LOCAL_VARIABLE_READ_NODE: {
pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
- parse_target_implicit_parameter(parser, node);
- pm_node_destroy(parser, node);
+ pm_node_unreference(parser, node);
return result;
}
case PM_LOCAL_VARIABLE_READ_NODE: {
- if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
- PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start);
- parse_target_implicit_parameter(parser, node);
+ if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
+ PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
+ pm_node_unreference(parser, node);
}
pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node;
parser_lex(parser);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
- pm_node_destroy(parser, node);
return result;
}
case PM_CALL_NODE: {
@@ -21577,14 +21387,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// receiver that could have been a local variable) then we
// will transform it into a local variable write.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
- pm_location_t *message_loc = &cast->message_loc;
- pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
+ pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
+ pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
- pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
-
- pm_node_destroy(parser, (pm_node_t *) cast);
return result;
}
@@ -21592,8 +21399,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// this is an aref expression, and we can transform it into
// an aset expression.
if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
}
// If this node cannot be writable, then we have an error.
@@ -21604,8 +21411,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
}
parse_call_operator_write(parser, cast, &token);
- pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
+ pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
}
case PM_MULTI_WRITE_NODE: {
parser_lex(parser);
@@ -21618,7 +21425,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// In this case we have an operator but we don't know what it's for.
// We need to treat it as an error. For now, we'll mark it as an error
// and just skip right past it.
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type));
return node;
}
}
@@ -21626,15 +21433,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_TOKEN_KEYWORD_AND: {
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
+ pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_and_node_create(parser, node, &token, right));
}
case PM_TOKEN_KEYWORD_OR:
case PM_TOKEN_PIPE_PIPE: {
parser_lex(parser);
- pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
+ pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_or_node_create(parser, node, &token, right));
}
case PM_TOKEN_EQUAL_TILDE: {
// Note that we _must_ parse the value before adding the local
@@ -21645,11 +21452,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
//
// In this case, `foo` should be a method call and not a local yet.
parser_lex(parser);
- pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
// By default, we're going to create a call node and then return it.
pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
- pm_node_t *result = (pm_node_t *) call;
+ pm_node_t *result = UP(call);
// If the receiver of this =~ is a regular expression node, then we
// need to introduce local variables for it based on its named
@@ -21690,14 +21497,25 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_string_t owned;
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
- result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
- pm_string_free(&owned);
+ result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+ pm_string_cleanup(&owned);
}
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
- // If we have a regular expression node, then we can just parse
- // the named captures directly off the unescaped string.
- const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
- result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
+ // If we have a regular expression node, then we can parse
+ // the named captures and validate encoding in one pass.
+ pm_regular_expression_node_t *regexp = (pm_regular_expression_node_t *) node;
+
+ pm_regexp_name_data_t name_data = {
+ .call = call,
+ .match = NULL,
+ .names = { 0 },
+ };
+
+ pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
+
+ if (name_data.match != NULL) {
+ result = UP(name_data.match);
+ }
}
return result;
@@ -21729,21 +21547,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
case PM_RESCUE_MODIFIER_NODE: {
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_AND_NODE: {
pm_and_node_t *cast = (pm_and_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_OR_NODE: {
pm_or_node_t *cast = (pm_or_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
@@ -21751,20 +21569,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
break;
}
- pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
+ pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
}
case PM_TOKEN_GREATER:
case PM_TOKEN_GREATER_EQUAL:
case PM_TOKEN_LESS:
case PM_TOKEN_LESS_EQUAL: {
if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
- PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
+ PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
}
parser_lex(parser);
- pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
+ pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
}
case PM_TOKEN_AMPERSAND_DOT:
case PM_TOKEN_DOT: {
@@ -21775,28 +21593,28 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// This if statement handles the foo.() syntax.
if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
+ return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
}
switch (PM_NODE_TYPE(node)) {
case PM_RESCUE_MODIFIER_NODE: {
pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_AND_NODE: {
pm_and_node_t *cast = (pm_and_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
case PM_OR_NODE: {
pm_or_node_t *cast = (pm_or_node_t *) node;
if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
}
break;
}
@@ -21817,23 +21635,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
break;
}
default: {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
- message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type));
+ message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
}
}
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
if (
(previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
arguments.arguments == NULL &&
- arguments.opening_loc.start == NULL &&
+ arguments.opening_loc.length == 0 &&
match1(parser, PM_TOKEN_COMMA)
) {
- return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
} else {
- return (pm_node_t *) call;
+ return UP(call);
}
}
case PM_TOKEN_DOT_DOT:
@@ -21842,40 +21660,40 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *right = NULL;
if (token_begins_expression_p(parser->current.type)) {
- right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
+ right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
}
- return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
+ return UP(pm_range_node_create(parser, node, &token, right));
}
case PM_TOKEN_KEYWORD_IF_MODIFIER: {
pm_token_t keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
}
case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
pm_token_t keyword = parser->current;
parser_lex(parser);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
}
case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
parser_lex(parser);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, node, true);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
}
case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
parser_lex(parser);
pm_statements_node_t *statements = pm_statements_node_create(parser);
pm_statements_node_body_append(parser, statements, node, true);
- pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
+ pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
+ return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
}
case PM_TOKEN_QUESTION_MARK: {
context_push(parser, PM_CONTEXT_TERNARY);
@@ -21885,7 +21703,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t qmark = parser->current;
parser_lex(parser);
- pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
+ pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
if (parser->recovering) {
// If parsing the true expression of this ternary resulted in a syntax
@@ -21894,27 +21712,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// before the `expect` function call to make sure it doesn't
// accidentally move past a ':' token that occurs after the syntax
// error.
- pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
- pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
+ pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+ pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
context_pop(parser);
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+ return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
}
accept1(parser, PM_TOKEN_NEWLINE);
expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
pm_token_t colon = parser->previous;
- pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
+ pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
context_pop(parser);
pop_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
-
- return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
+ return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
}
case PM_TOKEN_COLON_COLON: {
parser_lex(parser);
@@ -21927,7 +21741,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (
(parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
- (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
+ ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
) {
// If we have a constant immediately following a '::' operator, then
// this can either be a constant path or a method call, depending on
@@ -21938,11 +21752,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_token_t message = parser->previous;
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
- path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
+ path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
} else {
// Otherwise, this is a constant path. That would look like Foo::Bar.
- path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+ path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
// If this is followed by a comma then it is a multiple assignment.
@@ -21962,15 +21776,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
// If we have an identifier following a '::' operator, then it is for
// sure a method call.
pm_arguments_t arguments = { 0 };
- parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
+ parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
// If this is followed by a comma then it is a multiple assignment.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
- return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
- return (pm_node_t *) call;
+ return UP(call);
}
case PM_TOKEN_PARENTHESIS_LEFT: {
// If we have a parenthesis following a '::' operator, then it is the
@@ -21978,11 +21792,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_arguments_t arguments = { 0 };
parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
- return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
+ return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
}
default: {
expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
- return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
+ return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
}
}
}
@@ -21991,31 +21805,31 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
parser_lex(parser);
accept1(parser, PM_TOKEN_NEWLINE);
- pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+ pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
context_pop(parser);
- return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
+ return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
}
case PM_TOKEN_BRACKET_LEFT: {
parser_lex(parser);
pm_arguments_t arguments = { 0 };
- arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments.opening_loc = TOK2LOC(parser, &parser->previous);
if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
pm_accepts_block_stack_push(parser, true);
- parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
+ parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
pm_accepts_block_stack_pop(parser);
expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
}
- arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
+ arguments.closing_loc = TOK2LOC(parser, &parser->previous);
// If we have a comma after the closing bracket then this is a multiple
// assignment and we should parse the targets.
if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
- return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+ return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
}
// If we're at the end of the arguments, we can now check if there is a
@@ -22031,17 +21845,17 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
if (block != NULL) {
if (arguments.block != NULL) {
- pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
+ pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
if (arguments.arguments == NULL) {
arguments.arguments = pm_arguments_node_create(parser);
}
- pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
+ pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
}
- arguments.block = (pm_node_t *) block;
+ arguments.block = UP(block);
}
- return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
+ return UP(pm_call_node_aref_create(parser, node, &arguments));
}
case PM_TOKEN_KEYWORD_IN: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -22056,9 +21870,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- pm_constant_id_list_free(&captures);
- return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
+ return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
}
case PM_TOKEN_EQUAL_GREATER: {
bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
@@ -22073,9 +21886,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
parser->pattern_matching_newlines = previous_pattern_matching_newlines;
- pm_constant_id_list_free(&captures);
- return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
+ return UP(pm_match_required_node_create(parser, node, pattern, &operator));
}
default:
assert(false && "unreachable");
@@ -22088,16 +21900,83 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
#undef PM_PARSE_PATTERN_MULTI
/**
- * Determine if a given call node looks like a "command", which means it has
- * arguments but does not have parentheses.
+ * Some nodes act as statements and limit which operators can follow. This
+ * function inspects the node and the upcoming token to determine whether the
+ * expression loop should stop. It is called both after prefix parsing and after
+ * each infix operator.
+ *
+ * As a side effect, this function also attaches do-blocks to command-style call
+ * nodes when appropriate.
+ *
+ * Returns true if the expression loop should stop (i.e., the next operator
+ * should not be consumed).
*/
-static inline bool
-pm_call_node_command_p(const pm_call_node_t *node) {
- return (
- (node->opening_loc.start == NULL) &&
- (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
- (node->arguments != NULL || node->block != NULL)
- );
+static bool
+parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
+ pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_MULTI_WRITE_NODE:
+ case PM_RETURN_NODE:
+ case PM_BREAK_NODE:
+ case PM_NEXT_NODE:
+ return left > PM_BINDING_POWER_MODIFIER;
+ case PM_CLASS_VARIABLE_WRITE_NODE:
+ case PM_CONSTANT_PATH_WRITE_NODE:
+ case PM_CONSTANT_WRITE_NODE:
+ case PM_GLOBAL_VARIABLE_WRITE_NODE:
+ case PM_INSTANCE_VARIABLE_WRITE_NODE:
+ case PM_LOCAL_VARIABLE_WRITE_NODE:
+ return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
+ case PM_CALL_NODE: {
+ // Calls with an implicit array on the right-hand side are
+ // statements and can only be followed by modifiers.
+ if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
+ return left > PM_BINDING_POWER_MODIFIER;
+ }
+
+ // Command-style calls (including block commands like
+ // `foo bar do end`) can only be followed by composition
+ // (and/or) and modifier (if/unless/etc.) operators.
+ if (pm_command_call_value_p(node)) {
+ return left > PM_BINDING_POWER_COMPOSITION;
+ }
+
+ // A block call (command with do-block, or any call chained
+ // from one) can only be followed by call chaining (., ::,
+ // &.), composition (and/or), and modifier operators.
+ if (pm_block_call_p(node)) {
+ return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
+ }
+
+ return false;
+ }
+ case PM_SUPER_NODE:
+ case PM_YIELD_NODE:
+ // Command-style super/yield (without parens) can only be followed
+ // by composition and modifier operators.
+ if (pm_command_call_value_p(node)) {
+ return left > PM_BINDING_POWER_COMPOSITION;
+ }
+ return false;
+ case PM_DEF_NODE:
+ // An endless method whose body is a command-style call (e.g.,
+ // `def f = foo bar`) is a command assignment and can only be
+ // followed by modifiers.
+ return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
+ case PM_RESCUE_MODIFIER_NODE:
+ // A rescue modifier whose handler is a pattern match (=> or in)
+ // produces a statement and cannot be followed by operators above
+ // the modifier level.
+ if (left > PM_BINDING_POWER_MODIFIER) {
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
+ pm_node_t *rescue_expression = cast->rescue_expression;
+ return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
+ }
+ return false;
+ default:
+ return false;
+ }
}
/**
@@ -22109,46 +21988,40 @@ pm_call_node_command_p(const pm_call_node_t *node) {
* determine if they need to perform additional cleanup.
*/
static pm_node_t *
-parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
+parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
- return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
+ return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
}
- pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
+ pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
+ // Some prefix nodes are statements and can only be followed by modifiers
+ // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
+ // here before entering the infix loop.
switch (PM_NODE_TYPE(node)) {
- case PM_MISSING_NODE:
- // If we found a syntax error, then the type of node returned by
- // parse_expression_prefix is going to be a missing node.
+ case PM_ERROR_RECOVERY_NODE:
return node;
case PM_PRE_EXECUTION_NODE:
+ return node;
case PM_POST_EXECUTION_NODE:
case PM_ALIAS_GLOBAL_VARIABLE_NODE:
case PM_ALIAS_METHOD_NODE:
- case PM_MULTI_WRITE_NODE:
case PM_UNDEF_NODE:
- // These expressions are statements, and cannot be followed by
- // operators (except modifiers).
if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
return node;
}
break;
case PM_CALL_NODE:
- // If we have a call node, then we need to check if it looks like a
- // method call without parentheses that contains arguments. If it
- // does, then it has different rules for parsing infix operators,
- // namely that it only accepts composition (and/or) and modifiers
- // (if/unless/etc.).
- if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
+ case PM_SUPER_NODE:
+ case PM_YIELD_NODE:
+ case PM_DEF_NODE:
+ if (parse_expression_terminator(parser, node)) {
return node;
}
break;
case PM_SYMBOL_NODE:
- // If we have a symbol node that is being parsed as a label, then we
- // need to immediately return, because there should never be an
- // infix operator following this node.
- if (pm_symbol_node_label_p(node)) {
+ if (pm_symbol_node_label_p(parser, node)) {
return node;
}
break;
@@ -22156,8 +22029,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
break;
}
- // Otherwise we'll look and see if the next token can be parsed as an infix
- // operator. If it can, then we'll parse it using parse_expression_infix.
+ // Look and see if the next token can be parsed as an infix operator. If it
+ // can, then we'll parse it using parse_expression_infix.
pm_binding_powers_t current_binding_powers;
pm_token_type_t current_token_type;
@@ -22167,39 +22040,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
binding_power <= current_binding_powers.left &&
current_binding_powers.binary
) {
- node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
-
- switch (PM_NODE_TYPE(node)) {
- case PM_MULTI_WRITE_NODE:
- // Multi-write nodes are statements, and cannot be followed by
- // operators except modifiers.
- if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
- return node;
- }
- break;
- case PM_CLASS_VARIABLE_WRITE_NODE:
- case PM_CONSTANT_PATH_WRITE_NODE:
- case PM_CONSTANT_WRITE_NODE:
- case PM_GLOBAL_VARIABLE_WRITE_NODE:
- case PM_INSTANCE_VARIABLE_WRITE_NODE:
- case PM_LOCAL_VARIABLE_WRITE_NODE:
- // These expressions are statements, by virtue of the right-hand
- // side of their write being an implicit array.
- if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
- return node;
- }
- break;
- case PM_CALL_NODE:
- // These expressions are also statements, by virtue of the
- // right-hand side of the expression (i.e., the last argument to
- // the call node) being an implicit array.
- if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
- return node;
- }
- break;
- default:
- break;
- }
+ node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
+ if (parse_expression_terminator(parser, node)) return node;
// If the operator is nonassoc and we should not be able to parse the
// upcoming infix operator, break.
@@ -22207,7 +22049,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
// If this is a non-assoc operator and we are about to parse the
// exact same operator, then we need to add an error.
if (match1(parser, current_token_type)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
break;
}
@@ -22220,7 +22062,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
//
if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
break;
}
@@ -22232,7 +22074,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
}
}
- if (accepts_command_call) {
+ if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
// A command-style method call is only accepted on method chains.
// Thus, we check whether the parsed node can continue method chains.
// The method chain can continue if the parsed node is one of the following five kinds:
@@ -22247,29 +22089,29 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
if (
// (1) foo[1]
!(
- cast->call_operator_loc.start == NULL &&
- cast->message_loc.start != NULL &&
- cast->message_loc.start[0] == '[' &&
- cast->message_loc.end[-1] == ']'
+ cast->call_operator_loc.length == 0 &&
+ cast->message_loc.length > 0 &&
+ parser->start[cast->message_loc.start] == '[' &&
+ parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
) &&
// (2) foo.bar
!(
- cast->call_operator_loc.start != NULL &&
+ cast->call_operator_loc.length > 0 &&
cast->arguments == NULL &&
cast->block == NULL &&
- cast->opening_loc.start == NULL
+ cast->opening_loc.length == 0
) &&
// (3) foo.bar(1)
!(
- cast->call_operator_loc.start != NULL &&
- cast->opening_loc.start != NULL
+ cast->call_operator_loc.length > 0 &&
+ cast->opening_loc.length > 0
) &&
// (4) foo.bar do end
!(
cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
)
) {
- accepts_command_call = false;
+ flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
}
break;
}
@@ -22277,10 +22119,21 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
case PM_CONSTANT_PATH_NODE:
break;
default:
- accepts_command_call = false;
+ flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
break;
}
}
+
+ if (context_terminator(parser->current_context->context, &parser->current)) {
+ pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
+ if (
+ !next_binding_powers.binary ||
+ binding_power > next_binding_powers.left ||
+ (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
+ ) {
+ return node;
+ }
+ }
}
return node;
@@ -22299,15 +22152,16 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
pm_arguments_node_arguments_append(
+ parser->arena,
arguments,
- (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
);
- pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
+ pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
parser,
arguments,
pm_parser_constant_id_constant(parser, "print", 5)
- ), true);
+ )), true);
}
if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
@@ -22318,47 +22172,49 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
pm_arguments_node_arguments_append(
+ parser->arena,
arguments,
- (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
);
pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
- pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
+ pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
parser,
pm_parser_constant_id_constant(parser, "$F", 2),
- (pm_node_t *) call
+ UP(call)
);
- pm_statements_node_body_prepend(statements, (pm_node_t *) write);
+ pm_statements_node_body_prepend(parser->arena, statements, UP(write));
}
pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
pm_arguments_node_arguments_append(
+ parser->arena,
arguments,
- (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
+ UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
);
if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
- pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
+ pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
parser,
- (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
- &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
- (pm_node_t *) pm_true_node_synthesized_create(parser)
- ));
+ UP(pm_symbol_node_synthesized_create(parser, "chomp")),
+ NULL,
+ UP(pm_true_node_synthesized_create(parser))
+ )));
- pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
- pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
+ pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
+ pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
}
pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
- pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
+ pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
parser,
- (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
+ UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
statements
- ), true);
+ )), true);
statements = wrapped_statements;
}
@@ -22402,7 +22258,6 @@ parse_program(pm_parser_t *parser) {
statements = wrap_statements(parser, statements);
} else {
flush_block_exits(parser, previous_block_exits);
- pm_node_list_free(&current_block_exits);
}
// If this is an empty file, then we're still going to parse all of the
@@ -22410,10 +22265,10 @@ parse_program(pm_parser_t *parser) {
// correct the location information.
if (statements == NULL) {
statements = pm_statements_node_create(parser);
- pm_statements_node_location_set(statements, parser->start, parser->start);
+ statements->base.location = (pm_location_t) { 0 };
}
- return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
+ return UP(pm_program_node_create(parser, &locals, statements));
}
/******************************************************************************/
@@ -22422,8 +22277,8 @@ parse_program(pm_parser_t *parser) {
/**
* A vendored version of strnstr that is used to find a substring within a
- * string with a given length. This function is used to search for the Ruby
- * engine name within a shebang when the -x option is passed to Ruby.
+ * string with a given length. This function is used to search for "ruby"
+ * within a shebang when the -x option is passed to Ruby.
*
* The only modification that we made here is that we don't do NULL byte checks
* because we know the little parameter will not have a NULL byte and we allow
@@ -22433,7 +22288,7 @@ static const char *
pm_strnstr(const char *big, const char *little, size_t big_length) {
size_t little_length = strlen(little);
- for (const char *big_end = big + big_length; big < big_end; big++) {
+ for (const char *max = big + big_length - little_length; big <= max; big++) {
if (*big == *little && memcmp(big, little, little_length) == 0) return big;
}
@@ -22451,7 +22306,7 @@ pm_strnstr(const char *big, const char *little, size_t big_length) {
static void
pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
- pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
+ pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
}
}
#endif
@@ -22486,11 +22341,14 @@ pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const c
/**
* Initialize a parser with the given start and end pointers.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+void
+pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
+ assert(arena != NULL);
assert(source != NULL);
*parser = (pm_parser_t) {
+ .arena = arena,
+ .metadata_arena = { 0 },
.node_id = 0,
.lex_state = PM_LEX_STATE_BEG,
.enclosure_nesting = 0,
@@ -22509,7 +22367,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
.next_start = NULL,
.heredoc_end = NULL,
- .data_loc = { .start = NULL, .end = NULL },
+ .data_loc = { 0 },
.comment_list = { 0 },
.magic_comment_list = { 0 },
.warning_list = { 0 },
@@ -22519,11 +22377,11 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.encoding = PM_ENCODING_UTF_8_ENTRY,
.encoding_changed_callback = NULL,
.encoding_comment_start = source,
- .lex_callback = NULL,
+ .lex_callback = { 0 },
.filepath = { 0 },
.constant_pool = { 0 },
- .newline_list = { 0 },
- .integer_base = 0,
+ .line_offsets = { 0 },
+ .integer = { 0 },
.current_string = PM_STRING_EMPTY,
.start_line = 1,
.explicit_encoding = NULL,
@@ -22532,6 +22390,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.partial_script = false,
.command_start = true,
.recovering = false,
+ .continuable = true,
.encoding_locked = false,
.encoding_changed = false,
.pattern_matching_newlines = false,
@@ -22539,32 +22398,30 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.current_block_exits = NULL,
.semantic_token_seen = false,
.frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
- .current_regular_expression_ascii_only = false,
.warn_mismatched_indentation = true
};
- // Initialize the constant pool. We're going to completely guess as to the
- // number of constants that we'll need based on the size of the input. The
- // ratio we chose here is actually less arbitrary than you might think.
- //
- // We took ~50K Ruby files and measured the size of the file versus the
- // number of constants that were found in those files. Then we found the
- // average and standard deviation of the ratios of constants/bytesize. Then
- // we added 1.34 standard deviations to the average to get a ratio that
- // would fit 75% of the files (for a two-tailed distribution). This works
- // because there was about a 0.77 correlation and the distribution was
- // roughly normal.
- //
- // This ratio will need to change if we add more constants to the constant
- // pool for another node type.
- uint32_t constant_size = ((uint32_t) size) / 95;
- pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
-
- // Initialize the newline list. Similar to the constant pool, we're going to
- // guess at the number of newlines that we'll need based on the size of the
- // input.
+ /* Pre-size the arenas based on input size to reduce the number of block
+ * allocations (and the kernel page zeroing they trigger). The ratios were
+ * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
+ * The reserve call is a no-op when the capacity is at or below the default
+ * arena block size, so small inputs don't waste an extra allocation. */
+ if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
+ if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
+
+ /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
+ * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
+ * We use 120 as a balance between over-allocation waste and resize
+ * frequency. Resizes are cheap with arena allocation, so we lean toward
+ * under-estimating. */
+ uint32_t constant_size = ((uint32_t) size) / 120;
+ pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
+
+ /* Initialize the line offset list. Similar to the constant pool, we are
+ * going to estimate the number of newlines that we will need based on the
+ * size of the input. */
size_t newline_size = size / 22;
- pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
+ pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
// If options were provided to this parse, establish them here.
if (options != NULL) {
@@ -22601,7 +22458,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
- const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
+ const pm_options_scope_t *scope = pm_options_scope(options, scope_index);
pm_parser_scope_push(parser, scope_index == 0);
// Scopes given from the outside are not allowed to have numbered
@@ -22609,20 +22466,24 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
- const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
+ const pm_string_t *local = pm_options_scope_local(scope, local_index);
const uint8_t *source = pm_string_source(local);
size_t length = pm_string_length(local);
- void *allocated = xmalloc(length);
- if (allocated == NULL) continue;
-
+ uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
memcpy(allocated, source, length);
- pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
+ pm_parser_local_add_owned(parser, allocated, length);
}
}
}
+ // Now that we have established the user-provided options, check if
+ // a version was given and parse as the latest version otherwise.
+ if (parser->version == PM_OPTIONS_VERSION_UNSET) {
+ parser->version = PM_OPTIONS_VERSION_LATEST;
+ }
+
pm_accepts_block_stack_push(parser, true);
// Skip past the UTF-8 BOM if it exists.
@@ -22656,8 +22517,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
// If the shebang does not include "ruby" and this is the main script being
// parsed, then we will start searching the file for a shebang that does
// contain "ruby" as if -x were passed on the command line.
- const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
- size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
+ const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
+ size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
const char *engine;
@@ -22676,7 +22537,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
}
search_shebang = false;
- } else if (options->main_script && !parser->parsing_eval) {
+ } else if (options != NULL && options->main_script && !parser->parsing_eval) {
search_shebang = true;
}
}
@@ -22697,7 +22558,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
const uint8_t *newline = next_newline(cursor, parser->end - cursor);
while (newline != NULL) {
- pm_newline_list_append(&parser->newline_list, newline);
+ pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
cursor = newline + 1;
newline = next_newline(cursor, parser->end - cursor);
@@ -22726,8 +22587,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
} else {
- pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
- pm_newline_list_clear(&parser->newline_list);
+ pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
+ pm_line_offset_list_clear(&parser->line_offsets);
}
}
@@ -22738,56 +22599,28 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
}
/**
- * Register a callback that will be called whenever prism changes the encoding
- * it is using to parse based on the magic comment.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
- parser->encoding_changed_callback = callback;
-}
-
-/**
- * Free all of the memory associated with the comment list.
- */
-static inline void
-pm_comment_list_free(pm_list_t *list) {
- pm_list_node_t *node, *next;
-
- for (node = list->head; node != NULL; node = next) {
- next = node->next;
-
- pm_comment_t *comment = (pm_comment_t *) node;
- xfree(comment);
- }
-}
-
-/**
- * Free all of the memory associated with the magic comment list.
+ * Allocate and initialize a parser with the given start and end pointers.
+ *
+ * The resulting parser must eventually be freed with `pm_parser_free()`. The
+ * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()`
+ * does not free the arena.
*/
-static inline void
-pm_magic_comment_list_free(pm_list_t *list) {
- pm_list_node_t *node, *next;
-
- for (node = list->head; node != NULL; node = next) {
- next = node->next;
+pm_parser_t *
+pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) {
+ pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t));
+ if (parser == NULL) abort();
- pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
- xfree(magic_comment);
- }
+ pm_parser_init(arena, parser, source, size, options);
+ return parser;
}
/**
* Free any memory associated with the given parser.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_parser_free(pm_parser_t *parser) {
- pm_string_free(&parser->filepath);
- pm_diagnostic_list_free(&parser->error_list);
- pm_diagnostic_list_free(&parser->warning_list);
- pm_comment_list_free(&parser->comment_list);
- pm_magic_comment_list_free(&parser->magic_comment_list);
- pm_constant_pool_free(&parser->constant_pool);
- pm_newline_list_free(&parser->newline_list);
+void
+pm_parser_cleanup(pm_parser_t *parser) {
+ pm_string_cleanup(&parser->filepath);
+ pm_arena_cleanup(&parser->metadata_arena);
while (parser->current_scope != NULL) {
// Normally, popping the scope doesn't free the locals since it is
@@ -22803,145 +22636,224 @@ pm_parser_free(pm_parser_t *parser) {
}
/**
- * Parse the Ruby source associated with the given parser and return the tree.
+ * Free both the memory held by the given parser and the parser itself.
*/
-PRISM_EXPORTED_FUNCTION pm_node_t *
-pm_parse(pm_parser_t *parser) {
- return parse_program(parser);
+void
+pm_parser_free(pm_parser_t *parser) {
+ pm_parser_cleanup(parser);
+ xfree_sized(parser, sizeof(pm_parser_t));
}
/**
- * Read into the stream until the gets callback returns false. If the last read
- * line from the stream matches an __END__ marker, then halt and return false,
- * otherwise return true.
+ * Returns true if the given diagnostic ID represents an error that cannot be
+ * fixed by appending more input. These are errors where the existing source
+ * contains definitively invalid syntax (as opposed to merely incomplete input).
*/
static bool
-pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) {
-#define LINE_SIZE 4096
- char line[LINE_SIZE];
-
- while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
- size_t length = LINE_SIZE;
- while (length > 0 && line[length - 1] == '\n') length--;
-
- if (length == LINE_SIZE) {
- // If we read a line that is the maximum size and it doesn't end
- // with a newline, then we'll just append it to the buffer and
- // continue reading.
- length--;
- pm_buffer_append_string(buffer, line, length);
- continue;
- }
-
- // Append the line to the buffer.
- length--;
- pm_buffer_append_string(buffer, line, length);
-
- // Check if the line matches the __END__ marker. If it does, then stop
- // reading and return false. In most circumstances, this means we should
- // stop reading from the stream so that the DATA constant can pick it
- // up.
- switch (length) {
- case 7:
- if (strncmp(line, "__END__", 7) == 0) return false;
- break;
- case 8:
- if (strncmp(line, "__END__\n", 8) == 0) return false;
- break;
- case 9:
- if (strncmp(line, "__END__\r\n", 9) == 0) return false;
- break;
- }
+pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
+ switch (diag_id) {
+ case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
+ case PM_ERR_BEGIN_UPCASE_BRACE:
+ case PM_ERR_CLASS_VARIABLE_BARE:
+ case PM_ERR_END_UPCASE_BRACE:
+ case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
+ case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
+ case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
+ case PM_ERR_EXPRESSION_NOT_WRITABLE:
+ case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
+ case PM_ERR_FLOAT_PARSE:
+ case PM_ERR_GLOBAL_VARIABLE_BARE:
+ case PM_ERR_HASH_KEY:
+ case PM_ERR_HEREDOC_IDENTIFIER:
+ case PM_ERR_INSTANCE_VARIABLE_BARE:
+ case PM_ERR_INVALID_BLOCK_EXIT:
+ case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
+ case PM_ERR_INVALID_FLOAT_EXPONENT:
+ case PM_ERR_INVALID_NUMBER_BINARY:
+ case PM_ERR_INVALID_NUMBER_DECIMAL:
+ case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
+ case PM_ERR_INVALID_NUMBER_OCTAL:
+ case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
+ case PM_ERR_NO_LOCAL_VARIABLE:
+ case PM_ERR_PARAMETER_ORDER:
+ case PM_ERR_STATEMENT_UNDEF:
+ case PM_ERR_VOID_EXPRESSION:
+ return true;
+ default:
+ return false;
}
-
- return true;
-#undef LINE_SIZE
}
/**
- * Determine if there was an unterminated heredoc at the end of the input, which
- * would mean the stream isn't finished and we should keep reading.
+ * Determine whether the source parsed by the given parser could become valid if
+ * more input were appended. This is used by tools like IRB to decide whether to
+ * prompt for continuation or to display an error.
+ *
+ * The parser starts with continuable=true. This function scans all errors to
+ * detect two categories of non-continuable errors:
+ *
+ * 1. Fatal errors: errors like invalid number literals or bare global variables
+ * that indicate definitively invalid syntax. These are only considered fatal
+ * if they occur before EOF (at EOF they could be from truncated input, e.g.
+ * `"\x` is an incomplete hex escape).
*
- * For the other lex modes we can check if the lex mode has been closed, but for
- * heredocs when we hit EOF we close the lex mode and then go back to parse the
- * rest of the line after the heredoc declaration so that we get more of the
- * syntax tree.
+ * 2. Stray tokens: unexpected_token_ignore and unexpected_token_close_context
+ * errors indicate tokens that don't belong. A stray token is a cascade
+ * effect (and does not prevent continuability) if:
+ *
+ * a. A non-stray, non-fatal error appeared earlier in the error list at a
+ * strictly earlier source position (the stray was caused by a preceding
+ * parse failure, e.g. a truncated heredoc), OR
+ * b. The stray token is at EOF, starts after position 0 (there is valid
+ * code before it), and either is a single byte (likely a truncated
+ * token like `\`) or there are non-stray errors elsewhere.
+ *
+ * Closing delimiters (`)`, `]`, `}`) at EOF are always genuinely stray —
+ * they are complete tokens and cannot become part of a longer valid
+ * construct by appending more input.
+ *
+ * c. The stray token is `=` at the start of a line, which could be the
+ * beginning of `=begin` (an embedded document). The remaining bytes
+ * after `=` may parse as an identifier, so the error is not at EOF,
+ * but the construct is genuinely incomplete.
*/
-static bool
-pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
+static void
+pm_parse_continuable(pm_parser_t *parser) {
+ // If there are no errors then there is nothing to continue.
+ if (parser->error_list.size == 0) {
+ parser->continuable = false;
+ return;
+ }
- for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
- if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
- return true;
+ if (!parser->continuable) return;
+
+ size_t source_length = (size_t) (parser->end - parser->start);
+
+ // First pass: check if there are any non-stray, non-fatal errors.
+ bool has_non_stray_error = false;
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+ if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
+ has_non_stray_error = true;
+ break;
}
}
- return false;
-}
+ // Second pass: check each error. We track the minimum source position
+ // among non-stray, non-fatal errors seen so far in list order, which
+ // lets us detect cascade stray tokens.
+ size_t non_stray_min_start = SIZE_MAX;
-/**
- * Parse a stream of Ruby source and return the tree.
- *
- * Prism is designed around having the entire source in memory at once, but you
- * can stream stdin in to Ruby so we need to support a streaming API.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t *
-pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) {
- pm_buffer_init(buffer);
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+ size_t error_start = (size_t) error->location.start;
+ size_t error_end = error_start + (size_t) error->location.length;
+ bool at_eof = error_end >= source_length;
- bool eof = pm_parse_stream_read(buffer, stream, stream_fgets);
- pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
- pm_node_t *node = pm_parse(parser);
+ // Fatal errors are non-continuable unless they occur at EOF.
+ if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
+ parser->continuable = false;
+ return;
+ }
- while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
- pm_node_destroy(parser, node);
- eof = pm_parse_stream_read(buffer, stream, stream_fgets);
+ // Track non-stray, non-fatal error positions in list order.
+ if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
+ error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
+ if (error_start < non_stray_min_start) non_stray_min_start = error_start;
+ continue;
+ }
- pm_parser_free(parser);
- pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
- node = pm_parse(parser);
+ // This is a stray token. Determine if it is a cascade effect
+ // of a preceding error or genuinely stray.
+
+ // Rule (a): a non-stray error was seen earlier in the list at a
+ // strictly earlier position — this stray is a cascade effect.
+ if (non_stray_min_start < error_start) continue;
+
+ // Rule (b): this stray is at EOF with valid code before it.
+ // Single-byte stray tokens at EOF (like `\` for line continuation)
+ // are likely truncated tokens. Multi-byte stray tokens (like the
+ // keyword `end`) need additional evidence that they are cascade
+ // effects (i.e. non-stray errors exist elsewhere).
+ if (at_eof && error_start > 0) {
+ // Exception: closing delimiters at EOF are genuinely stray.
+ if (error->location.length == 1) {
+ const uint8_t *byte = parser->start + error_start;
+ if (*byte == ')' || *byte == ']' || *byte == '}') {
+ parser->continuable = false;
+ return;
+ }
+
+ // Single-byte non-delimiter stray at EOF: cascade.
+ continue;
+ }
+
+ // Multi-byte stray at EOF: cascade only if there are
+ // non-stray errors (evidence of a preceding parse failure).
+ if (has_non_stray_error) continue;
+ }
+
+ // Rule (c): a stray `=` at the start of a line could be the
+ // beginning of an embedded document (`=begin`). The remaining
+ // bytes after `=` parse as an identifier, so the error is not
+ // at EOF, but the construct is genuinely incomplete.
+ if (error->location.length == 1) {
+ const uint8_t *byte = parser->start + error_start;
+ if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
+ }
+
+ // This stray token is genuinely non-continuable.
+ parser->continuable = false;
+ return;
}
+}
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ */
+pm_node_t *
+pm_parse(pm_parser_t *parser) {
+ pm_node_t *node = parse_program(parser);
+ pm_parse_continuable(parser);
return node;
}
/**
- * Parse the source and return true if it parses without errors or warnings.
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * Prism is designed around having the entire source in memory at once, but you
+ * can stream stdin in to Ruby so we need to support a streaming API.
*/
-PRISM_EXPORTED_FUNCTION bool
-pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
- pm_options_t options = { 0 };
- pm_options_read(&options, data);
+pm_node_t *
+pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) {
+ bool eof = pm_source_stream_read(source);
- pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
+ pm_node_t *node = pm_parse(tmp);
- pm_node_t *node = pm_parse(&parser);
- pm_node_destroy(&parser, node);
+ while (!eof && tmp->error_list.size > 0) {
+ eof = pm_source_stream_read(source);
- bool result = parser.error_list.size == 0;
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_free(tmp);
+ pm_arena_cleanup(arena);
- return result;
+ tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
+ node = pm_parse(tmp);
+ }
+
+ *parser = tmp;
+ return node;
}
#undef PM_CASE_KEYWORD
#undef PM_CASE_OPERATOR
#undef PM_CASE_WRITABLE
#undef PM_STRING_EMPTY
-#undef PM_LOCATION_NODE_BASE_VALUE
-#undef PM_LOCATION_NODE_VALUE
-#undef PM_LOCATION_NULL_VALUE
-#undef PM_LOCATION_TOKEN_VALUE
// We optionally support serializing to a binary string. For systems that don't
// want or need this functionality, it can be turned off with the
// PRISM_EXCLUDE_SERIALIZATION define.
#ifndef PRISM_EXCLUDE_SERIALIZATION
-static inline void
+static PRISM_INLINE void
pm_serialize_header(pm_buffer_t *buffer) {
pm_buffer_append_string(buffer, "PRISM", 5);
pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
@@ -22953,7 +22865,7 @@ pm_serialize_header(pm_buffer_t *buffer) {
/**
* Serialize the AST represented by the given node to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_header(buffer);
pm_serialize_content(parser, node, buffer);
@@ -22964,13 +22876,14 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
* Parse and serialize the AST represented by the given source to the given
* buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
pm_options_read(&options, data);
+ pm_arena_t arena = { 0 };
pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_init(&arena, &parser, source, size, &options);
pm_node_t *node = pm_parse(&parser);
@@ -22978,216 +22891,53 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
pm_serialize_content(&parser, node, buffer);
pm_buffer_append_byte(buffer, '\0');
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
/**
* Parse and serialize the AST represented by the source that is read out of the
* given stream into to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) {
- pm_parser_t parser;
+void
+pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) {
+ pm_arena_t arena = { 0 };
+ pm_parser_t *parser;
pm_options_t options = { 0 };
pm_options_read(&options, data);
- pm_buffer_t parser_buffer;
- pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options);
+ pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options);
pm_serialize_header(buffer);
- pm_serialize_content(&parser, node, buffer);
+ pm_serialize_content(parser, node, buffer);
pm_buffer_append_byte(buffer, '\0');
- pm_node_destroy(&parser, node);
- pm_buffer_free(&parser_buffer);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_free(parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
/**
* Parse and serialize the comments in the given source to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
pm_options_read(&options, data);
+ pm_arena_t arena = { 0 };
pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_init(&arena, &parser, source, size, &options);
- pm_node_t *node = pm_parse(&parser);
+ pm_parse(&parser);
pm_serialize_header(buffer);
pm_serialize_encoding(parser.encoding, buffer);
pm_buffer_append_varsint(buffer, parser.start_line);
- pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
+ pm_serialize_comment_list(&parser.comment_list, buffer);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
#endif
-
-/******************************************************************************/
-/* Slice queries for the Ruby API */
-/******************************************************************************/
-
-/** The category of slice returned from pm_slice_type. */
-typedef enum {
- /** Returned when the given encoding name is invalid. */
- PM_SLICE_TYPE_ERROR = -1,
-
- /** Returned when no other types apply to the slice. */
- PM_SLICE_TYPE_NONE,
-
- /** Returned when the slice is a valid local variable name. */
- PM_SLICE_TYPE_LOCAL,
-
- /** Returned when the slice is a valid constant name. */
- PM_SLICE_TYPE_CONSTANT,
-
- /** Returned when the slice is a valid method name. */
- PM_SLICE_TYPE_METHOD_NAME
-} pm_slice_type_t;
-
-/**
- * Check that the slice is a valid local variable name or constant.
- */
-pm_slice_type_t
-pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
- // first, get the right encoding object
- const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
- if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
-
- // check that there is at least one character
- if (length == 0) return PM_SLICE_TYPE_NONE;
-
- size_t width;
- if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
- // valid because alphabetical
- } else if (*source == '_') {
- // valid because underscore
- width = 1;
- } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
- // valid because multibyte
- } else {
- // invalid because no match
- return PM_SLICE_TYPE_NONE;
- }
-
- // determine the type of the slice based on the first character
- const uint8_t *end = source + length;
- pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
-
- // next, iterate through all of the bytes of the string to ensure that they
- // are all valid identifier characters
- source += width;
-
- while (source < end) {
- if ((width = encoding->alnum_char(source, end - source)) != 0) {
- // valid because alphanumeric
- source += width;
- } else if (*source == '_') {
- // valid because underscore
- source++;
- } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
- // valid because multibyte
- source += width;
- } else {
- // invalid because no match
- break;
- }
- }
-
- // accept a ! or ? at the end of the slice as a method name
- if (*source == '!' || *source == '?' || *source == '=') {
- source++;
- result = PM_SLICE_TYPE_METHOD_NAME;
- }
-
- // valid if we are at the end of the slice
- return source == end ? result : PM_SLICE_TYPE_NONE;
-}
-
-/**
- * Check that the slice is a valid local variable name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
- switch (pm_slice_type(source, length, encoding_name)) {
- case PM_SLICE_TYPE_ERROR:
- return PM_STRING_QUERY_ERROR;
- case PM_SLICE_TYPE_NONE:
- case PM_SLICE_TYPE_CONSTANT:
- case PM_SLICE_TYPE_METHOD_NAME:
- return PM_STRING_QUERY_FALSE;
- case PM_SLICE_TYPE_LOCAL:
- return PM_STRING_QUERY_TRUE;
- }
-
- assert(false && "unreachable");
- return PM_STRING_QUERY_FALSE;
-}
-
-/**
- * Check that the slice is a valid constant name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
- switch (pm_slice_type(source, length, encoding_name)) {
- case PM_SLICE_TYPE_ERROR:
- return PM_STRING_QUERY_ERROR;
- case PM_SLICE_TYPE_NONE:
- case PM_SLICE_TYPE_LOCAL:
- case PM_SLICE_TYPE_METHOD_NAME:
- return PM_STRING_QUERY_FALSE;
- case PM_SLICE_TYPE_CONSTANT:
- return PM_STRING_QUERY_TRUE;
- }
-
- assert(false && "unreachable");
- return PM_STRING_QUERY_FALSE;
-}
-
-/**
- * Check that the slice is a valid method name.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t
-pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
-#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
-#define C1(c) (*source == c)
-#define C2(s) (memcmp(source, s, 2) == 0)
-#define C3(s) (memcmp(source, s, 3) == 0)
-
- switch (pm_slice_type(source, length, encoding_name)) {
- case PM_SLICE_TYPE_ERROR:
- return PM_STRING_QUERY_ERROR;
- case PM_SLICE_TYPE_NONE:
- break;
- case PM_SLICE_TYPE_LOCAL:
- // numbered parameters are not valid method names
- return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
- case PM_SLICE_TYPE_CONSTANT:
- // all constants are valid method names
- case PM_SLICE_TYPE_METHOD_NAME:
- // all method names are valid method names
- return PM_STRING_QUERY_TRUE;
- }
-
- switch (length) {
- case 1:
- return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
- case 2:
- return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
- case 3:
- return B(C3("===") || C3("<=>") || C3("[]="));
- default:
- return PM_STRING_QUERY_FALSE;
- }
-
-#undef B
-#undef C1
-#undef C2
-#undef C3
-}
diff --git a/prism/prism.h b/prism/prism.h
index 317568aa0c..b342bb32c6 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -6,281 +6,25 @@
#ifndef PRISM_H
#define PRISM_H
-#include "prism/defines.h"
-#include "prism/util/pm_buffer.h"
-#include "prism/util/pm_char.h"
-#include "prism/util/pm_integer.h"
-#include "prism/util/pm_memchr.h"
-#include "prism/util/pm_strncasecmp.h"
-#include "prism/util/pm_strpbrk.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "prism/arena.h"
#include "prism/ast.h"
+#include "prism/buffer.h"
#include "prism/diagnostic.h"
+#include "prism/json.h"
#include "prism/node.h"
#include "prism/options.h"
-#include "prism/pack.h"
#include "prism/parser.h"
#include "prism/prettyprint.h"
-#include "prism/regexp.h"
-#include "prism/static_literals.h"
+#include "prism/serialize.h"
+#include "prism/source.h"
+#include "prism/stream.h"
+#include "prism/string_query.h"
#include "prism/version.h"
-#include <assert.h>
-#include <errno.h>
-#include <locale.h>
-#include <math.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef _WIN32
-#include <strings.h>
-#endif
-
-/**
- * The prism version and the serialization format.
- *
- * @returns The prism version as a constant string.
- */
-PRISM_EXPORTED_FUNCTION const char * pm_version(void);
-
-/**
- * Initialize a parser with the given start and end pointers.
- *
- * @param parser The parser to initialize.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param options The optional options to use when parsing.
- */
-PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);
-
-/**
- * Register a callback that will be called whenever prism changes the encoding
- * it is using to parse based on the magic comment.
- *
- * @param parser The parser to register the callback with.
- * @param callback The callback to register.
- */
-PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
-
-/**
- * Free any memory associated with the given parser.
- *
- * @param parser The parser to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
-
-/**
- * Initiate the parser with the given parser.
- *
- * @param parser The parser to use.
- * @return The AST representing the source.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
-
-/**
- * This function is used in pm_parse_stream to retrieve a line of input from a
- * stream. It closely mirrors that of fgets so that fgets can be used as the
- * default implementation.
- */
-typedef char * (pm_parse_stream_fgets_t)(char *string, int size, void *stream);
-
-/**
- * Parse a stream of Ruby source and return the tree.
- *
- * @param parser The parser to use.
- * @param buffer The buffer to use.
- * @param stream The stream to parse.
- * @param stream_fgets The function to use to read from the stream.
- * @param options The optional options to use when parsing.
- * @return The AST representing the source.
- */
-PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options);
-
-// We optionally support serializing to a binary string. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_SERIALIZATION define.
-#ifndef PRISM_EXCLUDE_SERIALIZATION
-
-/**
- * Parse and serialize the AST represented by the source that is read out of the
- * given stream into to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param stream The stream to parse.
- * @param stream_fgets The function to use to read from the stream.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data);
-
-/**
- * Serialize the given list of comments to the given buffer.
- *
- * @param parser The parser to serialize.
- * @param list The list of comments to serialize.
- * @param buffer The buffer to serialize to.
- */
-void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
-
-/**
- * Serialize the name of the encoding to the buffer.
- *
- * @param encoding The encoding to serialize.
- * @param buffer The buffer to serialize to.
- */
-void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer);
-
-/**
- * Serialize the encoding, metadata, nodes, and constant pool.
- *
- * @param parser The parser to serialize.
- * @param node The node to serialize.
- * @param buffer The buffer to serialize to.
- */
-void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-
-/**
- * Serialize the AST represented by the given node to the given buffer.
- *
- * @param parser The parser to serialize.
- * @param node The node to serialize.
- * @param buffer The buffer to serialize to.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-
-/**
- * Parse the given source to the AST and dump the AST to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-/**
- * Parse and serialize the comments in the given source to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-/**
- * Lex the given source and serialize to the given buffer.
- *
- * @param source The source to lex.
- * @param size The size of the source.
- * @param buffer The buffer to serialize to.
- * @param data The optional data to pass to the lexer.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-/**
- * Parse and serialize both the AST and the tokens represented by the given
- * source to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- */
-PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);
-
-#endif
-
-/**
- * Parse the source and return true if it parses without errors or warnings.
- *
- * @param source The source to parse.
- * @param size The size of the source.
- * @param data The optional data to pass to the parser.
- * @return True if the source parses without errors or warnings.
- */
-PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data);
-
-/**
- * Returns a string representation of the given token type.
- *
- * @param token_type The token type to convert to a string.
- * @return A string representation of the given token type.
- */
-PRISM_EXPORTED_FUNCTION const char * pm_token_type_name(pm_token_type_t token_type);
-
-/**
- * Returns the human name of the given token type.
- *
- * @param token_type The token type to convert to a human name.
- * @return The human name of the given token type.
- */
-const char * pm_token_type_human(pm_token_type_t token_type);
-
-// We optionally support dumping to JSON. For systems that don't want or need
-// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
-#ifndef PRISM_EXCLUDE_JSON
-
-/**
- * Dump JSON to the given buffer.
- *
- * @param buffer The buffer to serialize to.
- * @param parser The parser that parsed the node.
- * @param node The node to serialize.
- */
-PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
-
-#endif
-
-/**
- * Represents the results of a slice query.
- */
-typedef enum {
- /** Returned if the encoding given to a slice query was invalid. */
- PM_STRING_QUERY_ERROR = -1,
-
- /** Returned if the result of the slice query is false. */
- PM_STRING_QUERY_FALSE,
-
- /** Returned if the result of the slice query is true. */
- PM_STRING_QUERY_TRUE
-} pm_string_query_t;
-
-/**
- * Check that the slice is a valid local variable name.
- *
- * @param source The source to check.
- * @param length The length of the source.
- * @param encoding_name The name of the encoding of the source.
- * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
- * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name);
-
-/**
- * Check that the slice is a valid constant name.
- *
- * @param source The source to check.
- * @param length The length of the source.
- * @param encoding_name The name of the encoding of the source.
- * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
- * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name);
-
-/**
- * Check that the slice is a valid method name.
- *
- * @param source The source to check.
- * @param length The length of the source.
- * @param encoding_name The name of the encoding of the source.
- * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
- * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
- */
-PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name);
-
/**
* @mainpage
*
@@ -289,7 +33,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
* dependencies. It is currently being integrated into
* [CRuby](https://github.com/ruby/ruby),
* [JRuby](https://github.com/jruby/jruby),
- * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [TruffleRuby](https://github.com/truffleruby/truffleruby),
* [Sorbet](https://github.com/sorbet/sorbet), and
* [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
*
@@ -303,32 +47,32 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
*
* @section parsing Parsing
*
- * In order to parse Ruby code, the structures and functions that you're going
- * to want to use and be aware of are:
+ * In order to parse Ruby code, the functions that you are going to want to use
+ * and be aware of are:
*
- * * `pm_parser_t` - the main parser structure
- * * `pm_parser_init` - initialize a parser
- * * `pm_parse` - parse and return the root node
- * * `pm_node_destroy` - deallocate the root node returned by `pm_parse`
- * * `pm_parser_free` - free the internal memory of the parser
+ * * `pm_arena_new()` - create a new arena to hold all AST-lifetime allocations
+ * * `pm_parser_new()` - allocate and initialize a new parser
+ * * `pm_parse()` - parse and return the root node
+ * * `pm_parser_free()` - free the parser and its internal memory
+ * * `pm_arena_free()` - free all AST-lifetime memory
*
* Putting all of this together would look something like:
*
* ```c
* void parse(const uint8_t *source, size_t length) {
- * pm_parser_t parser;
- * pm_parser_init(&parser, source, length, NULL);
+ * pm_arena_t *arena = pm_arena_new();
+ * pm_parser_t *parser = pm_parser_new(arena, source, length, NULL);
*
- * pm_node_t *root = pm_parse(&parser);
+ * pm_node_t *root = pm_parse(parser);
* printf("PARSED!\n");
*
- * pm_node_destroy(&parser, root);
- * pm_parser_free(&parser);
+ * pm_parser_free(parser);
+ * pm_arena_free(arena);
* }
* ```
*
- * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
- * their first member. This means you can downcast and upcast any node in the
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures
+ * as their first member. This means you can downcast and upcast any node in the
* tree to a `pm_node_t`.
*
* @section serializing Serializing
@@ -336,48 +80,51 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint
* Prism provides the ability to serialize the AST and its related metadata into
* a binary format. This format is designed to be portable to different
* languages and runtimes so that you only need to make one FFI call in order to
- * parse Ruby code. The structures and functions that you're going to want to
- * use and be aware of are:
+ * parse Ruby code. The functions that you are going to want to use and be
+ * aware of are:
*
- * * `pm_buffer_t` - a small buffer object that will hold the serialized AST
- * * `pm_buffer_free` - free the memory associated with the buffer
- * * `pm_serialize` - serialize the AST into a buffer
- * * `pm_serialize_parse` - parse and serialize the AST into a buffer
+ * * `pm_buffer_new()` - create a new buffer
+ * * `pm_buffer_free()` - free the buffer and its internal memory
+ * * `pm_serialize_parse()` - parse and serialize the AST into a buffer
*
* Putting all of this together would look something like:
*
* ```c
* void serialize(const uint8_t *source, size_t length) {
- * pm_buffer_t buffer = { 0 };
+ * pm_buffer_t *buffer = pm_buffer_new();
*
- * pm_serialize_parse(&buffer, source, length, NULL);
+ * pm_serialize_parse(buffer, source, length, NULL);
* printf("SERIALIZED!\n");
*
- * pm_buffer_free(&buffer);
+ * pm_buffer_free(buffer);
* }
* ```
*
* @section inspecting Inspecting
*
* Prism provides the ability to inspect the AST by pretty-printing nodes. You
- * can do this with the `pm_prettyprint` function, which you would use like:
+ * can do this with the `pm_prettyprint()` function, which you would use like:
*
* ```c
* void prettyprint(const uint8_t *source, size_t length) {
- * pm_parser_t parser;
- * pm_parser_init(&parser, source, length, NULL);
+ * pm_arena_t *arena = pm_arena_new();
+ * pm_parser_t *parser = pm_parser_new(arena, source, length, NULL);
*
- * pm_node_t *root = pm_parse(&parser);
- * pm_buffer_t buffer = { 0 };
+ * pm_node_t *root = pm_parse(parser);
+ * pm_buffer_t *buffer = pm_buffer_new();
*
- * pm_prettyprint(&buffer, &parser, root);
- * printf("%*.s\n", (int) buffer.length, buffer.value);
+ * pm_prettyprint(buffer, parser, root);
+ * printf("%*.s\n", (int) pm_buffer_length(buffer), pm_buffer_value(buffer));
*
- * pm_buffer_free(&buffer);
- * pm_node_destroy(&parser, root);
- * pm_parser_free(&parser);
+ * pm_buffer_free(buffer);
+ * pm_parser_free(parser);
+ * pm_arena_free(arena);
* }
* ```
*/
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/prism/regexp.c b/prism/regexp.c
index dcc7476244..cc17aa4d09 100644
--- a/prism/regexp.c
+++ b/prism/regexp.c
@@ -1,5 +1,20 @@
-#include "prism/regexp.h"
-
+#include "prism/internal/regexp.h"
+
+#include "prism/compiler/inline.h"
+#include "prism/compiler/fallthrough.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/char.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/memchr.h"
+#include "prism/internal/parser.h"
+#include "prism/internal/stringy.h"
+#include "prism/internal/strncasecmp.h"
+
+#include <assert.h>
+#include <string.h>
+
+/** The maximum depth of nested groups allowed in a regular expression. */
#define PM_REGEXP_PARSE_DEPTH_MAX 4096
/**
@@ -18,6 +33,54 @@ typedef struct {
/** A pointer to the end of the source that we are parsing. */
const uint8_t *end;
+ /** The encoding of the source. */
+ const pm_encoding_t *encoding;
+
+ /** The callback to call when a named capture group is found. */
+ pm_regexp_name_callback_t name_callback;
+
+ /** The data to pass to the name callback. */
+ pm_regexp_name_data_t *name_data;
+
+ /** The start of the regexp node (for error locations). */
+ const uint8_t *node_start;
+
+ /** The end of the regexp node (for error locations). */
+ const uint8_t *node_end;
+
+ /**
+ * The explicit encoding determined by escape sequences. NULL if no
+ * encoding-setting escape has been seen, UTF-8 for `\u` escapes, or the
+ * source encoding for `\x` escapes.
+ */
+ const pm_encoding_t *explicit_encoding;
+
+ /**
+ * Pointer to the first non-POSIX property name (for /n error messages).
+ * POSIX properties (Alnum, Alpha, etc.) work in all encodings.
+ * Script properties (Hiragana, Katakana, etc.) work in /e, /s, /u.
+ * Unicode-only properties (L, Ll, etc.) work only in /u.
+ */
+ const uint8_t *property_name;
+
+ /** Length of the first non-POSIX property name found. */
+ size_t property_name_length;
+
+ /**
+ * Pointer to the first Unicode-only property name (for /e, /s error
+ * messages). NULL if only POSIX or script properties have been seen.
+ */
+ const uint8_t *unicode_property_name;
+
+ /** Length of the first Unicode-only property name found. */
+ size_t unicode_property_name_length;
+
+ /** Buffer of hex escape byte values >= 0x80, separated by 0x00 sentinels. */
+ pm_buffer_t hex_escape_buffer;
+
+ /** Count of non-ASCII literal bytes (not from escapes). */
+ uint32_t non_ascii_literal_count;
+
/**
* Whether or not the regular expression currently being parsed is in
* extended mode, wherein whitespace is ignored and comments are allowed.
@@ -27,31 +90,77 @@ typedef struct {
/** Whether the encoding has changed from the default. */
bool encoding_changed;
- /** The encoding of the source. */
- const pm_encoding_t *encoding;
+ /** Whether the source content is shared (for named capture callback). */
+ bool shared;
- /** The callback to call when a named capture group is found. */
- pm_regexp_name_callback_t name_callback;
+ /** Whether a `\u{...}` escape with value >= 0x80 was seen. */
+ bool has_unicode_escape;
- /** The data to pass to the name callback. */
- void *name_data;
+ /** Whether a `\xNN` escape (or `\M-x`, etc.) with value >= 0x80 was seen. */
+ bool has_hex_escape;
+
+ /**
+ * Tracks whether the last encoding-setting escape was `\u` (true) or `\x`
+ * (false). This matters for error messages when both types are mixed.
+ */
+ bool last_escape_was_unicode;
+
+ /** Whether any `\p{...}` or `\P{...}` property escape was found. */
+ bool has_property_escape;
+
+ /** Whether a Unicode-only property escape was found (not POSIX or script). */
+ bool has_unicode_property_escape;
- /** The callback to call when a parse error is found. */
- pm_regexp_error_callback_t error_callback;
+ /** Whether a `\u` escape with invalid range (surrogate or > 0x10FFFF) was seen. */
+ bool invalid_unicode_range;
- /** The data to pass to the error callback. */
- void *error_data;
+ /** Whether we are accumulating consecutive hex escape bytes. */
+ bool hex_group_active;
+
+ /** Whether an invalid multibyte character was found during parsing. */
+ bool has_invalid_multibyte;
} pm_regexp_parser_t;
/**
- * Append an error to the parser.
+ * Append a syntax error to the parser's error list. If the source is shared
+ * (points into the original source), we can point to the exact error location.
+ * Otherwise, we point to the whole regexp node.
*/
-static inline void
+static PRISM_INLINE void
pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
- parser->error_callback(start, end, message, parser->error_data);
+ pm_parser_t *pm = parser->parser;
+ uint32_t loc_start, loc_length;
+
+ if (parser->shared) {
+ loc_start = (uint32_t) (start - pm->start);
+ loc_length = (uint32_t) (end - start);
+ } else {
+ loc_start = (uint32_t) (parser->node_start - pm->start);
+ loc_length = (uint32_t) (parser->node_end - parser->node_start);
+ }
+
+ pm_diagnostic_list_append_format(&pm->metadata_arena, &pm->error_list, loc_start, loc_length, PM_ERR_REGEXP_PARSE_ERROR, message);
}
/**
+ * Append a formatted diagnostic error with proper shared/non-shared location
+ * handling. This is a macro because we need variadic args for the format string.
+ */
+#define pm_regexp_parse_error_format(parser_, err_start_, err_end_, diag_id, ...) \
+ do { \
+ pm_parser_t *pm__ = (parser_)->parser; \
+ uint32_t loc_start__, loc_length__; \
+ if ((parser_)->shared) { \
+ loc_start__ = (uint32_t) ((err_start_) - pm__->start); \
+ loc_length__ = (uint32_t) ((err_end_) - (err_start_)); \
+ } else { \
+ loc_start__ = (uint32_t) ((parser_)->node_start - pm__->start); \
+ loc_length__ = (uint32_t) ((parser_)->node_end - (parser_)->node_start); \
+ } \
+ pm_diagnostic_list_append_format(&pm__->metadata_arena, &pm__->error_list, loc_start__, loc_length__, diag_id, __VA_ARGS__); \
+ } while (0)
+
+/**
* This appends a new string to the list of named captures. This function
* assumes the caller has already checked the validity of the name callback.
*/
@@ -59,14 +168,14 @@ static void
pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
pm_string_t string;
pm_string_shared_init(&string, start, end);
- parser->name_callback(&string, parser->name_data);
- pm_string_free(&string);
+ parser->name_callback(parser->parser, &string, parser->shared, parser->name_data);
+ pm_string_cleanup(&string);
}
/**
* Returns true if the next character is the end of the source.
*/
-static inline bool
+static PRISM_INLINE bool
pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
return parser->cursor >= parser->end;
}
@@ -74,7 +183,7 @@ pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
/**
* Optionally accept a char and consume it if it exists.
*/
-static inline bool
+static PRISM_INLINE bool
pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
parser->cursor++;
@@ -86,7 +195,7 @@ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
/**
* Expect a character to be present and consume it.
*/
-static inline bool
+static PRISM_INLINE bool
pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
parser->cursor++;
@@ -114,6 +223,47 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
}
/**
+ * Mark a group boundary in the hex escape byte buffer. When consecutive hex
+ * escape bytes >= 0x80 are followed by a non-hex-escape, this appends a 0x00
+ * sentinel to separate the groups for later multibyte validation.
+ */
+static PRISM_INLINE void
+pm_regexp_hex_group_boundary(pm_regexp_parser_t *parser) {
+ if (parser->hex_group_active) {
+ pm_buffer_append_byte(&parser->hex_escape_buffer, 0x00);
+ parser->hex_group_active = false;
+ }
+}
+
+/**
+ * Track a hex escape byte value >= 0x80 for multibyte validation.
+ */
+static PRISM_INLINE void
+pm_regexp_track_hex_escape(pm_regexp_parser_t *parser, uint8_t byte) {
+ if (byte >= 0x80) {
+ pm_buffer_append_byte(&parser->hex_escape_buffer, byte);
+ parser->hex_group_active = true;
+ parser->has_hex_escape = true;
+
+ parser->explicit_encoding = parser->encoding;
+ parser->last_escape_was_unicode = false;
+ } else {
+ pm_regexp_hex_group_boundary(parser);
+ }
+}
+
+/**
+ * Parse a hex digit character and return its value, or -1 if not a hex digit.
+ */
+static PRISM_INLINE int
+pm_regexp_hex_digit_value(uint8_t byte) {
+ if (byte >= '0' && byte <= '9') return byte - '0';
+ if (byte >= 'a' && byte <= 'f') return byte - 'a' + 10;
+ if (byte >= 'A' && byte <= 'F') return byte - 'A' + 10;
+ return -1;
+}
+
+/**
* Range quantifiers are a special class of quantifiers that look like
*
* * {digit}
@@ -121,13 +271,12 @@ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
* * {digit,digit}
* * {,digit}
*
- * Unfortunately, if there are any spaces in between, then this just becomes a
- * regular character match expression and we have to backtrack. So when this
- * function first starts running, we'll create a "save" point and then attempt
- * to parse the quantifier. If it fails, we'll restore the save point and
- * return.
+ * If there are any spaces in between, then this just becomes a regular
+ * character match expression and we have to backtrack. So when this function
+ * first starts running, we'll create a "save" point and then attempt to parse
+ * the quantifier. If it fails, we'll restore the save point and return.
*
- * The properly track everything, we're going to build a little state machine.
+ * To properly track everything, we're going to build a little state machine.
* It looks something like the following:
*
* +-------+ +---------+ ------------+
@@ -275,11 +424,393 @@ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
);
}
+/**
+ * Property escape classification. Onigmo supports three tiers of property
+ * names depending on the encoding:
+ *
+ * - POSIX properties (Alnum, Alpha, ASCII, Blank, Cntrl, Digit, Graph, Lower,
+ * Print, Punct, Space, Upper, XDigit, Word): valid in all encodings.
+ * - Script properties (Hiragana, Katakana, Han, Latin, Greek, Cyrillic): valid
+ * in EUC-JP (/e), Windows-31J (/s), and UTF-8 (/u), but not ASCII-8BIT (/n).
+ * - Unicode-only properties (general categories like L, Ll, Lu, etc., plus
+ * Any, Assigned): valid only in UTF-8 (/u).
+ */
+typedef enum {
+ PM_REGEXP_PROPERTY_POSIX,
+ PM_REGEXP_PROPERTY_SCRIPT,
+ PM_REGEXP_PROPERTY_UNICODE
+} pm_regexp_property_type_t;
+
+/**
+ * Classify a property name. The name may start with '^' for negation, which
+ * is skipped before matching.
+ */
+static pm_regexp_property_type_t
+pm_regexp_classify_property(const uint8_t *name, size_t length) {
+ // Skip leading '^' for negated properties like \p{^Hiragana}.
+ if (length > 0 && name[0] == '^') {
+ name++;
+ length--;
+ }
+
+#define PM_REGEXP_CASECMP(str_) (pm_strncasecmp(name, (const uint8_t *) (str_), length) == 0)
+
+ switch (length) {
+ case 3:
+ if (PM_REGEXP_CASECMP("Han")) return PM_REGEXP_PROPERTY_SCRIPT;
+ break;
+ case 4:
+ if (PM_REGEXP_CASECMP("Word")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 5:
+ /* Most properties are length 5, so dispatch on first character. */
+ switch (name[0] | 0x20) {
+ case 'a':
+ if (PM_REGEXP_CASECMP("Alnum")) return PM_REGEXP_PROPERTY_POSIX;
+ if (PM_REGEXP_CASECMP("Alpha")) return PM_REGEXP_PROPERTY_POSIX;
+ if (PM_REGEXP_CASECMP("ASCII")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 'b':
+ if (PM_REGEXP_CASECMP("Blank")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 'c':
+ if (PM_REGEXP_CASECMP("Cntrl")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 'd':
+ if (PM_REGEXP_CASECMP("Digit")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 'g':
+ if (PM_REGEXP_CASECMP("Graph")) return PM_REGEXP_PROPERTY_POSIX;
+ if (PM_REGEXP_CASECMP("Greek")) return PM_REGEXP_PROPERTY_SCRIPT;
+ break;
+ case 'l':
+ if (PM_REGEXP_CASECMP("Lower")) return PM_REGEXP_PROPERTY_POSIX;
+ if (PM_REGEXP_CASECMP("Latin")) return PM_REGEXP_PROPERTY_SCRIPT;
+ break;
+ case 'p':
+ if (PM_REGEXP_CASECMP("Print")) return PM_REGEXP_PROPERTY_POSIX;
+ if (PM_REGEXP_CASECMP("Punct")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 's':
+ if (PM_REGEXP_CASECMP("Space")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 'u':
+ if (PM_REGEXP_CASECMP("Upper")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ }
+ break;
+ case 6:
+ if (PM_REGEXP_CASECMP("XDigit")) return PM_REGEXP_PROPERTY_POSIX;
+ break;
+ case 8:
+ if (PM_REGEXP_CASECMP("Hiragana")) return PM_REGEXP_PROPERTY_SCRIPT;
+ if (PM_REGEXP_CASECMP("Katakana")) return PM_REGEXP_PROPERTY_SCRIPT;
+ if (PM_REGEXP_CASECMP("Cyrillic")) return PM_REGEXP_PROPERTY_SCRIPT;
+ break;
+ }
+
+#undef PM_REGEXP_CASECMP
+
+ // Everything else is Unicode-only (general categories, other scripts, etc.).
+ return PM_REGEXP_PROPERTY_UNICODE;
+}
+
+/**
+ * Check for and skip a `\p{...}` or `\P{...}` Unicode property escape. The
+ * cursor should be pointing at 'p' or 'P' when this is called. If a property
+ * escape is found, record it on the regexp parser and advance past the closing
+ * '}'.
+ *
+ * Properties are classified into three tiers (POSIX, script, Unicode-only) to
+ * determine which encoding modifiers they are valid with.
+ */
+static bool
+pm_regexp_parse_property_escape(pm_regexp_parser_t *parser) {
+ assert(*parser->cursor == 'p' || *parser->cursor == 'P');
+
+ if (parser->cursor + 1 < parser->end && parser->cursor[1] == '{') {
+ const uint8_t *name_start = parser->cursor + 2;
+ const uint8_t *search = name_start;
+
+ while (search < parser->end && *search != '}') search++;
+
+ if (search < parser->end) {
+ size_t name_length = (size_t) (search - name_start);
+ parser->has_property_escape = true;
+
+ pm_regexp_property_type_t type = pm_regexp_classify_property(name_start, name_length);
+
+ // Track the first non-POSIX property name (for /n error messages).
+ if (type >= PM_REGEXP_PROPERTY_SCRIPT && parser->property_name == NULL) {
+ parser->property_name = name_start;
+ parser->property_name_length = name_length;
+ }
+
+ // Track the first Unicode-only property name (for /e, /s error messages).
+ if (type == PM_REGEXP_PROPERTY_UNICODE) {
+ parser->has_unicode_property_escape = true;
+ if (parser->unicode_property_name == NULL) {
+ parser->unicode_property_name = name_start;
+ parser->unicode_property_name_length = name_length;
+ }
+ }
+
+ parser->cursor = search + 1; // skip past '}'
+ return true;
+ }
+ }
+
+ // Not a property escape, just skip the single character after '\'.
+ parser->cursor++;
+ return false;
+}
+
+/**
+ * Validate and skip a \u escape sequence in a regular expression. The cursor
+ * should be pointing at the character after 'u' when this is called. This
+ * handles both the \u{NNNN MMMM} and \uNNNN forms. Also tracks encoding
+ * state for validation.
+ */
+static void
+pm_regexp_parse_unicode_escape(pm_regexp_parser_t *parser) {
+ const uint8_t *escape_start = parser->cursor - 2; // points to '\'
+
+ if (pm_regexp_char_is_eof(parser)) {
+ pm_regexp_parse_error(parser, escape_start, parser->cursor, "invalid Unicode escape");
+ return;
+ }
+
+ if (*parser->cursor == '{') {
+ parser->cursor++; // skip '{'
+
+ // Skip leading whitespace.
+ while (!pm_regexp_char_is_eof(parser) && pm_char_is_whitespace(*parser->cursor)) {
+ parser->cursor++;
+ }
+
+ bool has_codepoint = false;
+
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '}') {
+ // Parse the hex digits to compute the codepoint value.
+ uint32_t value = 0;
+ size_t hex_count = 0;
+
+ int digit;
+ while (!pm_regexp_char_is_eof(parser) && (digit = pm_regexp_hex_digit_value(*parser->cursor)) >= 0) {
+ value = (value << 4) | (uint32_t) digit;
+ hex_count++;
+ parser->cursor++;
+ }
+
+ if (hex_count == 0) {
+ // Skip to '}' or end of regexp to find the full extent.
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '}') {
+ parser->cursor++;
+ }
+
+ const uint8_t *escape_end = parser->cursor;
+ if (!pm_regexp_char_is_eof(parser)) {
+ escape_end++;
+ parser->cursor++; // skip '}'
+ }
+
+ pm_regexp_parse_error_format(parser, escape_start, escape_end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (escape_end - escape_start), (const char *) escape_start);
+ return;
+ }
+
+ if (hex_count > 6) {
+ pm_regexp_parse_error(parser, escape_start, parser->cursor, "invalid Unicode range");
+ }
+
+ // Track encoding state for this codepoint.
+ if (value >= 0x80) {
+ parser->has_unicode_escape = true;
+ parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
+ parser->last_escape_was_unicode = true;
+ pm_regexp_hex_group_boundary(parser);
+ }
+
+ // Check for invalid Unicode range (surrogates or > 0x10FFFF).
+ if (value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF)) {
+ parser->invalid_unicode_range = true;
+ }
+
+ has_codepoint = true;
+
+ // Skip whitespace between codepoints.
+ while (!pm_regexp_char_is_eof(parser) && pm_char_is_whitespace(*parser->cursor)) {
+ parser->cursor++;
+ }
+ }
+
+ if (pm_regexp_char_is_eof(parser)) {
+ pm_regexp_parse_error(parser, escape_start, parser->cursor, "unterminated Unicode escape");
+ } else {
+ if (!has_codepoint) {
+ pm_regexp_parse_error_format(parser, escape_start, parser->cursor + 1, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->cursor + 1 - escape_start), (const char *) escape_start);
+ }
+ parser->cursor++; // skip '}'
+ }
+ } else {
+ // \uNNNN form — need exactly 4 hex digits.
+ uint32_t value = 0;
+ size_t hex_count = 0;
+
+ int digit;
+ while (hex_count < 4 && !pm_regexp_char_is_eof(parser) && (digit = pm_regexp_hex_digit_value(*parser->cursor)) >= 0) {
+ value = (value << 4) | (uint32_t) digit;
+ hex_count++;
+ parser->cursor++;
+ }
+
+ if (hex_count < 4) {
+ pm_regexp_parse_error(parser, escape_start, parser->cursor, "invalid Unicode escape");
+ } else if (value >= 0x80) {
+ parser->has_unicode_escape = true;
+ parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
+ parser->last_escape_was_unicode = true;
+ pm_regexp_hex_group_boundary(parser);
+ }
+
+ // Check for invalid Unicode range.
+ if (hex_count == 4 && (value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))) {
+ parser->invalid_unicode_range = true;
+ }
+ }
+}
+
// Forward declaration because character sets can be nested.
static bool
pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
/**
+ * Parse a \x escape and return the byte value. The cursor should be pointing
+ * at the character after 'x'. Returns -1 if no hex digits follow.
+ */
+static int
+pm_regexp_parse_hex_escape(pm_regexp_parser_t *parser) {
+ int value = -1;
+
+ if (!pm_regexp_char_is_eof(parser)) {
+ int digit = pm_regexp_hex_digit_value(*parser->cursor);
+ if (digit >= 0) {
+ value = digit;
+ parser->cursor++;
+
+ if (!pm_regexp_char_is_eof(parser)) {
+ digit = pm_regexp_hex_digit_value(*parser->cursor);
+ if (digit >= 0) {
+ value = (value << 4) | digit;
+ parser->cursor++;
+ }
+ }
+ }
+ }
+
+ if (value >= 0) {
+ pm_regexp_track_hex_escape(parser, (uint8_t) value);
+ }
+
+ return value;
+}
+
+/**
+ * Parse a backslash escape sequence in a regexp, handling \u (unicode),
+ * \p/\P (property), \x (hex), and other single-character escapes. Also
+ * tracks encoding state for \M-x and \C-\M-x escapes.
+ */
+static void
+pm_regexp_parse_backslash_escape(pm_regexp_parser_t *parser) {
+ if (pm_regexp_char_is_eof(parser)) return;
+
+ switch (*parser->cursor) {
+ case 'u':
+ parser->cursor++; // skip 'u'
+ pm_regexp_parse_unicode_escape(parser);
+ break;
+ case 'p':
+ case 'P':
+ pm_regexp_parse_property_escape(parser);
+ break;
+ case 'x':
+ parser->cursor++; // skip 'x'
+ pm_regexp_parse_hex_escape(parser);
+ break;
+ case 'M':
+ // \M-x produces (x | 0x80), always >= 0x80
+ if (parser->cursor + 2 < parser->end && parser->cursor[1] == '-') {
+ parser->cursor += 2; // skip 'M-'
+ if (!pm_regexp_char_is_eof(parser)) {
+ if (*parser->cursor == '\\') {
+ parser->cursor++;
+ // \M-\C-x or \M-\cx — the resulting byte is always >= 0x80
+ // We just need to track it as a hex escape >= 0x80.
+ pm_regexp_parse_backslash_escape(parser);
+ } else {
+ parser->cursor++;
+ }
+ // \M-x always produces a byte >= 0x80
+ pm_regexp_track_hex_escape(parser, 0x80);
+ }
+ } else {
+ parser->cursor++;
+ }
+ break;
+ case 'C':
+ // \C-x produces (x & 0x1F)
+ if (parser->cursor + 2 < parser->end && parser->cursor[1] == '-') {
+ parser->cursor += 2; // skip 'C-'
+ if (!pm_regexp_char_is_eof(parser)) {
+ if (*parser->cursor == '\\') {
+ parser->cursor++;
+ pm_regexp_parse_backslash_escape(parser);
+ } else {
+ parser->cursor++;
+ }
+ }
+ } else {
+ parser->cursor++;
+ }
+ break;
+ case 'c':
+ // \cx produces (x & 0x1F)
+ parser->cursor++; // skip 'c'
+ if (!pm_regexp_char_is_eof(parser)) {
+ if (*parser->cursor == '\\') {
+ parser->cursor++;
+ pm_regexp_parse_backslash_escape(parser);
+ } else {
+ parser->cursor++;
+ }
+ }
+ break;
+ default:
+ pm_regexp_hex_group_boundary(parser);
+ parser->cursor++;
+ break;
+ }
+}
+
+/**
+ * Check if a byte at the current position is a non-ASCII byte in a multibyte
+ * encoding that produces an invalid character. If so, emit an error at the
+ * byte location immediately.
+ */
+static void
+pm_regexp_parse_invalid_multibyte(pm_regexp_parser_t *parser, const uint8_t *cursor) {
+ uint8_t byte = *cursor;
+ if (byte >= 0x80 && parser->encoding_changed && parser->encoding->multibyte) {
+ size_t width = parser->encoding->char_width(cursor, (ptrdiff_t) (parser->end - cursor));
+ if (width > 1) {
+ parser->cursor += width - 1;
+ } else if (width == 0) {
+ parser->has_invalid_multibyte = true;
+ pm_regexp_parse_error_format(parser, cursor, cursor + 1, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ }
+ }
+}
+
+/**
* match-char-set : '[' '^'? (match-range | match-char)* ']'
* ;
*/
@@ -293,12 +824,16 @@ pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
break;
case '\\':
- if (!pm_regexp_char_is_eof(parser)) {
- parser->cursor++;
- }
+ pm_regexp_parse_backslash_escape(parser);
break;
default:
- // do nothing, we've already advanced the cursor
+ // We've already advanced the cursor by one byte. If the byte
+ // was >= 0x80 in a multibyte encoding, we may need to consume
+ // additional continuation bytes and validate the character.
+ if (*(parser->cursor - 1) >= 0x80) {
+ parser->non_ascii_literal_count++;
+ }
+ pm_regexp_parse_invalid_multibyte(parser, parser->cursor - 1);
break;
}
}
@@ -354,8 +889,13 @@ typedef enum {
// These are the options that are configurable on the regular expression (or
// from within a group).
+/** The minimum character value for a regexp option slot. */
#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
+
+/** The maximum character value for a regexp option slot. */
#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
+
+/** The number of regexp option slots. */
#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
/**
@@ -498,7 +1038,15 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
}
size_t width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
- if (width == 0) return false;
+ if (width == 0) {
+ if (*parser->cursor >= 0x80) {
+ parser->has_invalid_multibyte = true;
+ pm_regexp_parse_error_format(parser, parser->cursor, parser->cursor + 1, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ parser->cursor++;
+ continue;
+ }
+ return false;
+ }
escaped = (width == 1) && (*parser->cursor == '\\');
parser->cursor += width;
@@ -686,9 +1234,7 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
return pm_regexp_parse_quantifier(parser);
case '\\':
parser->cursor++;
- if (!pm_regexp_char_is_eof(parser)) {
- parser->cursor++;
- }
+ pm_regexp_parse_backslash_escape(parser);
return pm_regexp_parse_quantifier(parser);
case '(':
parser->cursor++;
@@ -720,9 +1266,30 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
}
- if (width == 0) return false; // TODO: add appropriate error
- parser->cursor += width;
+ if (width == 0) {
+ if (*parser->cursor >= 0x80 && parser->encoding_changed) {
+ if (parser->encoding->multibyte) {
+ // Invalid multibyte character in a multibyte encoding.
+ // Emit the error at the byte location immediately.
+ parser->has_invalid_multibyte = true;
+ pm_regexp_parse_error_format(parser, parser->cursor, parser->cursor + 1, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ } else {
+ // Non-ASCII byte in a single-byte encoding (e.g.,
+ // US-ASCII). Count it for later error reporting.
+ parser->non_ascii_literal_count++;
+ }
+ parser->cursor++;
+ return pm_regexp_parse_quantifier(parser);
+ }
+ return false;
+ }
+
+ // Count non-ASCII literal bytes.
+ for (size_t i = 0; i < width; i++) {
+ if (parser->cursor[i] >= 0x80) parser->non_ascii_literal_count++;
+ }
+ parser->cursor += width;
return pm_regexp_parse_quantifier(parser);
}
}
@@ -768,13 +1335,354 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
return pm_regexp_char_is_eof(parser);
}
+// ---------------------------------------------------------------------------
+// Encoding validation
+// ---------------------------------------------------------------------------
+
/**
- * Parse a regular expression and extract the names of all of the named capture
- * groups.
+ * Validate that groups of hex escape bytes in the buffer form valid multibyte
+ * characters in the given encoding. Groups are separated by 0x00 sentinels.
+ */
+static bool
+pm_regexp_validate_hex_escapes(const pm_encoding_t *encoding, const pm_buffer_t *buffer) {
+ const uint8_t *data = (const uint8_t *) pm_buffer_value(buffer);
+ size_t len = pm_buffer_length(buffer);
+ size_t i = 0;
+
+ while (i < len) {
+ size_t group_start = i;
+ while (i < len && data[i] != 0x00) i++;
+
+ for (size_t j = group_start; j < i; ) {
+ size_t width = encoding->char_width(data + j, (ptrdiff_t) (i - j));
+ if (width == 0) return false;
+ j += width;
+ }
+
+ if (i < len) i++; // skip sentinel
+ }
+
+ return true;
+}
+
+/**
+ * Format regexp source content for use in error messages, hex-escaping
+ * non-ASCII bytes.
+ */
+static void
+pm_regexp_format_for_error(pm_buffer_t *buffer, const pm_encoding_t *encoding, const uint8_t *source, size_t length) {
+ size_t index = 0;
+
+ if (encoding == PM_ENCODING_UTF_8_ENTRY) {
+ pm_buffer_append_string(buffer, (const char *) source, length);
+ return;
+ }
+
+ while (index < length) {
+ if (source[index] < 0x80) {
+ pm_buffer_append_byte(buffer, source[index]);
+ index++;
+ } else if (encoding->multibyte) {
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (length - index));
+
+ if (width > 1) {
+ pm_buffer_append_string(buffer, "\\x{", 3);
+ for (size_t i = 0; i < width; i++) {
+ pm_buffer_append_format(buffer, "%02X", source[index + i]);
+ }
+ pm_buffer_append_byte(buffer, '}');
+ index += width;
+ } else {
+ pm_buffer_append_format(buffer, "\\x%02X", source[index]);
+ index++;
+ }
+ } else {
+ pm_buffer_append_format(buffer, "\\x%02X", source[index]);
+ index++;
+ }
+ }
+}
+
+/**
+ * Emit an encoding validation error on the regexp node.
+ */
+#define PM_REGEXP_ENCODING_ERROR(parser, diag_id, ...) \
+ pm_diagnostic_list_append_format( \
+ &(parser)->parser->metadata_arena, \
+ &(parser)->parser->error_list, \
+ (uint32_t) ((parser)->node_start - (parser)->parser->start), \
+ (uint32_t) ((parser)->node_end - (parser)->node_start), \
+ diag_id, __VA_ARGS__)
+
+/**
+ * Validate encoding for a regexp with an encoding modifier (/e, /s, /u, /n).
+ *
+ * The decision tree is:
+ *
+ * 1. No escape-set encoding (explicit_encoding == NULL):
+ * a. ASCII-only content: validate property escapes, return forced US-ASCII
+ * for /n or the modifier flags for others.
+ * b. US-ASCII source with non-ASCII literals: emit per-byte errors.
+ * c. Source encoding differs from modifier encoding: emit mismatch error.
+ *
+ * 2. Mixed \u and \x escapes: emit the appropriate conflict error depending
+ * on the modifier and which escape type was last.
+ *
+ * 3. \u escape with non-/u modifier: incompatible encoding error.
+ *
+ * 4. Validate that hex escape byte sequences form valid multibyte characters
+ * in the modifier's encoding.
+ */
+static pm_node_flags_t
+pm_regexp_validate_encoding_modifier(pm_regexp_parser_t *parser, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding, const char *source_start, int source_length) {
+
+ if (parser->explicit_encoding == NULL) {
+ if (ascii_only) {
+ // Check property escapes against the modifier's encoding tier.
+ // /n (ASCII-8BIT): only POSIX properties are valid.
+ // /e, /s: POSIX and script properties are valid.
+ // /u: all properties are valid.
+ if (modifier == 'n' && parser->property_name != NULL) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INVALID_CHAR_PROPERTY,
+ (int) parser->property_name_length, (const char *) parser->property_name,
+ source_length, source_start);
+ } else if (modifier != 'u' && parser->has_unicode_property_escape) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INVALID_CHAR_PROPERTY,
+ (int) parser->unicode_property_name_length, (const char *) parser->unicode_property_name,
+ source_length, source_start);
+ }
+ return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
+ }
+
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ for (uint32_t i = 0; i < parser->non_ascii_literal_count; i++) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ }
+ } else if (parser->encoding != modifier_encoding) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
+
+ if (modifier == 'n' && !ascii_only) {
+ pm_buffer_t formatted = { 0 };
+ pm_regexp_format_for_error(&formatted, parser->encoding, (const uint8_t *) source_start, (size_t) source_length);
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) formatted.length, (const char *) formatted.value);
+ pm_buffer_cleanup(&formatted);
+ }
+ }
+
+ return flags;
+ }
+
+ // Mixed unicode + hex escapes.
+ if (parser->has_unicode_escape && parser->has_hex_escape) {
+ if (modifier == 'n') {
+ if (parser->last_escape_was_unicode) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP, source_length, source_start);
+ } else {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8, source_length, source_start);
+ }
+ } else {
+ if (!pm_regexp_validate_hex_escapes(modifier_encoding, &parser->hex_escape_buffer)) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+ }
+ }
+
+ return flags;
+ }
+
+ if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ if (parser->last_escape_was_unicode) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, source_length, source_start);
+ } else if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, source_length, source_start);
+ }
+ }
+
+ if (modifier != 'n' && !pm_regexp_validate_hex_escapes(modifier_encoding, &parser->hex_escape_buffer)) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+ }
+
+ return flags;
+}
+
+/**
+ * Validate encoding for a regexp without a modifier and compute the encoding
+ * flags to set on the node.
+ *
+ * The decision tree is:
+ *
+ * 1. If a modifier (/n, /u, /e, /s) is present, delegate to
+ * pm_regexp_validate_encoding_modifier.
+ * 2. Invalid multibyte chars or unicode ranges: suppress further checks (errors
+ * were already emitted during parsing).
+ * 3. US-ASCII source with non-ASCII literals: emit per-byte errors.
+ * 4. ASCII-only content: return forced US-ASCII (or forced UTF-8 if \p{...}).
+ * 5. Escape-set encoding present: validate hex escapes against the target
+ * encoding, handle mixed \u + \x conflicts, and return the appropriate
+ * forced encoding flag.
+ */
+static pm_node_flags_t
+pm_regexp_validate_encoding(pm_regexp_parser_t *parser, bool ascii_only, pm_node_flags_t flags, const char *source_start, int source_length) {
+
+ // Invalid multibyte characters suppress further validation.
+ // Errors were already emitted at the byte locations during parsing.
+ if (parser->has_invalid_multibyte) {
+ return flags;
+ }
+
+ if (parser->invalid_unicode_range) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, source_length, source_start);
+ return flags;
+ }
+
+ // Check modifier flags first.
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
+ return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY, source_start, source_length);
+ }
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
+ return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY, source_start, source_length);
+ }
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
+ return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY, source_start, source_length);
+ }
+ if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
+ return pm_regexp_validate_encoding_modifier(parser, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY, source_start, source_length);
+ }
+
+ // No modifier — check for non-ASCII literals in US-ASCII encoding.
+ if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
+ for (uint32_t i = 0; i < parser->non_ascii_literal_count; i++) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
+ }
+ }
+
+ // ASCII-only regexps get downgraded to US-ASCII, unless property escapes
+ // force UTF-8.
+ if (ascii_only) {
+ if (parser->has_property_escape) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+ }
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
+ }
+
+ // Check explicit encoding from escape sequences.
+ if (parser->explicit_encoding != NULL) {
+ // Mixed unicode + hex escapes without modifier.
+ if (parser->has_unicode_escape && parser->has_hex_escape && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
+ if (parser->encoding != PM_ENCODING_US_ASCII_ENTRY &&
+ parser->encoding != PM_ENCODING_ASCII_8BIT_ENTRY &&
+ !pm_regexp_validate_hex_escapes(parser->encoding, &parser->hex_escape_buffer)) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+ } else if (parser->last_escape_was_unicode) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP, source_length, source_start);
+ } else {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8, source_length, source_start);
+ }
+
+ return 0;
+ }
+
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ if (!pm_regexp_validate_hex_escapes(parser->explicit_encoding, &parser->hex_escape_buffer)) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+ }
+
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
+ } else {
+ if (!pm_regexp_validate_hex_escapes(parser->explicit_encoding, &parser->hex_escape_buffer)) {
+ PM_REGEXP_ENCODING_ERROR(parser, PM_ERR_INVALID_MULTIBYTE_ESCAPE, source_length, source_start);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Parse a regular expression, validate its encoding, and optionally extract
+ * named capture groups. Encoding validation walks the raw source (content_loc)
+ * to distinguish escape-produced bytes from literal bytes. Named capture
+ * extraction walks the unescaped content since escape sequences in group names
+ * (e.g., line continuations) have already been processed by the lexer.
+ */
+pm_node_flags_t
+pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) {
+ const uint8_t *source = parser->start + node->content_loc.start;
+ size_t size = node->content_loc.length;
+ bool extended_mode = PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED);
+ pm_node_flags_t flags = PM_NODE_FLAGS(node);
+
+ const uint8_t *node_start = parser->start + node->base.location.start;
+ const uint8_t *node_end = parser->start + node->base.location.start + node->base.location.length;
+
+ // First pass: walk raw source for encoding validation (no name extraction).
+ pm_regexp_parser_t regexp_parser = {
+ .parser = parser,
+ .start = source,
+ .cursor = source,
+ .end = source + size,
+ .extended_mode = extended_mode,
+ .encoding_changed = parser->encoding_changed,
+ .encoding = parser->encoding,
+ .name_callback = NULL,
+ .name_data = NULL,
+ .shared = true,
+ .node_start = node_start,
+ .node_end = node_end,
+ .has_unicode_escape = false,
+ .has_hex_escape = false,
+ .last_escape_was_unicode = false,
+ .explicit_encoding = NULL,
+ .has_property_escape = false,
+ .has_unicode_property_escape = false,
+ .property_name = NULL,
+ .property_name_length = 0,
+ .unicode_property_name = NULL,
+ .unicode_property_name_length = 0,
+ .non_ascii_literal_count = 0,
+ .invalid_unicode_range = false,
+ .hex_escape_buffer = { 0 },
+ .hex_group_active = false,
+ .has_invalid_multibyte = false,
+ };
+
+ pm_regexp_parse_pattern(&regexp_parser);
+
+ // Compute ascii_only from the regexp parser's tracked state. We cannot
+ // use node->unescaped for this because regexp unescaped content preserves
+ // escape text (e.g., \x80 is 4 ASCII chars), not the binary values.
+ bool ascii_only = !regexp_parser.has_hex_escape && !regexp_parser.has_unicode_escape && regexp_parser.non_ascii_literal_count == 0;
+ // Use the unescaped content for error messages to match CRuby's format,
+ // where Ruby escapes like \M-\C-? are resolved to bytes but regexp escapes
+ // like \u{80} are preserved as text.
+ const char *error_source = (const char *) pm_string_source(&node->unescaped);
+ int error_source_length = (int) pm_string_length(&node->unescaped);
+ pm_node_flags_t encoding_flags = pm_regexp_validate_encoding(&regexp_parser, ascii_only, flags, error_source, error_source_length);
+ pm_buffer_cleanup(&regexp_parser.hex_escape_buffer);
+
+ // Second pass: walk unescaped content for named capture extraction.
+ if (name_callback != NULL) {
+ bool shared = node->unescaped.type == PM_STRING_SHARED;
+ pm_regexp_parse_named_captures(parser, pm_string_source(&node->unescaped), pm_string_length(&node->unescaped), shared, extended_mode, name_callback, name_data);
+ }
+
+ return encoding_flags;
+}
+
+/**
+ * Parse an interpolated regular expression for named capture groups only.
+ * This is used for the =~ operator with interpolated regexps where we don't
+ * have a pm_regular_expression_node_t. No encoding validation is performed.
+ *
+ * Note: The encoding-tracking fields (has_unicode_escape, has_hex_escape, etc.)
+ * are initialized but not used for the result. They exist because the parsing
+ * functions (pm_regexp_parse_backslash_escape, etc.) unconditionally update
+ * them as they walk through the content.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
- pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
+void
+pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) {
+ pm_regexp_parser_t regexp_parser = {
.parser = parser,
.start = source,
.cursor = source,
@@ -784,7 +1692,26 @@ pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool ex
.encoding = parser->encoding,
.name_callback = name_callback,
.name_data = name_data,
- .error_callback = error_callback,
- .error_data = error_data
- });
+ .shared = shared,
+ .node_start = source,
+ .node_end = source + size,
+ .has_unicode_escape = false,
+ .has_hex_escape = false,
+ .last_escape_was_unicode = false,
+ .explicit_encoding = NULL,
+ .has_property_escape = false,
+ .has_unicode_property_escape = false,
+ .property_name = NULL,
+ .property_name_length = 0,
+ .unicode_property_name = NULL,
+ .unicode_property_name_length = 0,
+ .non_ascii_literal_count = 0,
+ .invalid_unicode_range = false,
+ .hex_escape_buffer = { 0 },
+ .hex_group_active = false,
+ .has_invalid_multibyte = false,
+ };
+
+ pm_regexp_parse_pattern(&regexp_parser);
+ pm_buffer_cleanup(&regexp_parser.hex_escape_buffer);
}
diff --git a/prism/regexp.h b/prism/regexp.h
deleted file mode 100644
index c0b3163e93..0000000000
--- a/prism/regexp.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * @file regexp.h
- *
- * A regular expression parser.
- */
-#ifndef PRISM_REGEXP_H
-#define PRISM_REGEXP_H
-
-#include "prism/defines.h"
-#include "prism/parser.h"
-#include "prism/encoding.h"
-#include "prism/util/pm_memchr.h"
-#include "prism/util/pm_string.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <string.h>
-
-/**
- * This callback is called when a named capture group is found.
- */
-typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
-
-/**
- * This callback is called when a parse error is found.
- */
-typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
-
-/**
- * Parse a regular expression.
- *
- * @param parser The parser that is currently being used.
- * @param source The source code to parse.
- * @param size The size of the source code.
- * @param extended_mode Whether to parse the regular expression in extended mode.
- * @param name_callback The optional callback to call when a named capture group is found.
- * @param name_data The optional data to pass to the name callback.
- * @param error_callback The callback to call when a parse error is found.
- * @param error_data The data to pass to the error callback.
- */
-PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
-
-#endif
diff --git a/prism/serialize.h b/prism/serialize.h
new file mode 100644
index 0000000000..786a1514bc
--- /dev/null
+++ b/prism/serialize.h
@@ -0,0 +1,96 @@
+/**
+ * @file serialize.h
+ *
+ * The functions related to serializing the AST to a binary format.
+ */
+#ifndef PRISM_SERIALIZE_H
+#define PRISM_SERIALIZE_H
+
+#include "prism/excludes.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifndef PRISM_EXCLUDE_SERIALIZATION
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/buffer.h"
+#include "prism/parser.h"
+#include "prism/source.h"
+#include "prism/stream.h"
+
+/**
+ * Serialize the AST represented by the given node to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) PRISM_NONNULL(1, 2, 3);
+
+/**
+ * Parse the given source to the AST and dump the AST to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse and serialize the AST represented by the given source into the given
+ * buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse and serialize the comments in the given source to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Lex the given source and serialize to the given buffer.
+ *
+ * @param source The source to lex.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param data The optional data to pass to the lexer.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ *
+ * @param buffer The buffer to serialize to.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ */
+PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1, 2);
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param data The optional data to pass to the parser.
+ * @returns True if the source parses without errors or warnings.
+ */
+PRISM_EXPORTED_FUNCTION bool pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) PRISM_NONNULL(1);
+
+#endif
+
+#endif
diff --git a/prism/source.c b/prism/source.c
new file mode 100644
index 0000000000..f61cb19c1b
--- /dev/null
+++ b/prism/source.c
@@ -0,0 +1,491 @@
+#include "prism/internal/source.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/buffer.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/* The following headers are necessary to read files using demand paging. */
+#ifdef _WIN32
+#include <windows.h>
+#elif defined(_POSIX_MAPPED_FILES)
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#elif defined(PRISM_HAS_FILESYSTEM)
+#include <fcntl.h>
+#include <sys/stat.h>
+#endif
+
+static const uint8_t empty_source[] = "";
+
+/**
+ * Allocate and initialize a pm_source_t with the given fields.
+ */
+static pm_source_t *
+pm_source_alloc(const uint8_t *source, size_t length, pm_source_type_t type) {
+ pm_source_t *result = xmalloc(sizeof(pm_source_t));
+ if (result == NULL) abort();
+
+ *result = (struct pm_source_t) {
+ .source = source,
+ .length = length,
+ .type = type
+ };
+
+ return result;
+}
+
+/**
+ * Create a new source that wraps existing constant memory.
+ */
+pm_source_t *
+pm_source_constant_new(const uint8_t *data, size_t length) {
+ return pm_source_alloc(data, length, PM_SOURCE_CONSTANT);
+}
+
+/**
+ * Create a new source that wraps existing shared memory.
+ */
+pm_source_t *
+pm_source_shared_new(const uint8_t *data, size_t length) {
+ return pm_source_alloc(data, length, PM_SOURCE_SHARED);
+}
+
+/**
+ * Create a new source that owns its memory.
+ */
+pm_source_t *
+pm_source_owned_new(uint8_t *data, size_t length) {
+ return pm_source_alloc(data, length, PM_SOURCE_OWNED);
+}
+
+#ifdef _WIN32
+/**
+ * Represents a file handle on Windows, where the path will need to be freed
+ * when the file is closed.
+ */
+typedef struct {
+ /** The path to the file, which will become allocated memory. */
+ WCHAR *path;
+
+ /** The size of the allocated path in bytes. */
+ size_t path_size;
+
+ /** The handle to the file, which will start as uninitialized memory. */
+ HANDLE file;
+} pm_source_file_handle_t;
+
+/**
+ * Open the file indicated by the filepath parameter for reading on Windows.
+ */
+static pm_source_init_result_t
+pm_source_file_handle_open(pm_source_file_handle_t *handle, const char *filepath) {
+ int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
+ if (length == 0) return PM_SOURCE_INIT_ERROR_GENERIC;
+
+ handle->path_size = sizeof(WCHAR) * ((size_t) length);
+ handle->path = xmalloc(handle->path_size);
+ if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
+ xfree_sized(handle->path, handle->path_size);
+ return PM_SOURCE_INIT_ERROR_GENERIC;
+ }
+
+ handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
+ if (handle->file == INVALID_HANDLE_VALUE) {
+ pm_source_init_result_t result = PM_SOURCE_INIT_ERROR_GENERIC;
+
+ if (GetLastError() == ERROR_ACCESS_DENIED) {
+ DWORD attributes = GetFileAttributesW(handle->path);
+ if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+ result = PM_SOURCE_INIT_ERROR_DIRECTORY;
+ }
+ }
+
+ xfree_sized(handle->path, handle->path_size);
+ return result;
+ }
+
+ return PM_SOURCE_INIT_SUCCESS;
+}
+
+/**
+ * Close the file handle and free the path.
+ */
+static void
+pm_source_file_handle_close(pm_source_file_handle_t *handle) {
+ xfree_sized(handle->path, handle->path_size);
+ CloseHandle(handle->file);
+}
+#endif
+
+/**
+ * Create a new source by memory-mapping a file.
+ */
+pm_source_t *
+pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) {
+#ifdef _WIN32
+ (void) open_flags;
+
+ /* Open the file for reading. */
+ pm_source_file_handle_t handle;
+ *result = pm_source_file_handle_open(&handle, filepath);
+ if (*result != PM_SOURCE_INIT_SUCCESS) return NULL;
+
+ /* Get the file size. */
+ DWORD file_size = GetFileSize(handle.file, NULL);
+ if (file_size == INVALID_FILE_SIZE) {
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* If the file is empty, then return a constant source. */
+ if (file_size == 0) {
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+ }
+
+ /* Create a mapping of the file. */
+ HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (mapping == NULL) {
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Map the file into memory. */
+ uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+ CloseHandle(mapping);
+ pm_source_file_handle_close(&handle);
+
+ if (source == NULL) {
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(source, (size_t) file_size, PM_SOURCE_MAPPED);
+#elif defined(_POSIX_MAPPED_FILES)
+ /* Open the file for reading. */
+ int fd = open(filepath, O_RDONLY | open_flags);
+ if (fd == -1) {
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Stat the file to get the file size. */
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Ensure it is a file and not a directory. */
+ if (S_ISDIR(sb.st_mode)) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_DIRECTORY;
+ return NULL;
+ }
+
+ /*
+ * For non-regular files (pipes, character devices), return a specific
+ * error so the caller can handle reading through their own I/O layer.
+ */
+ if (!S_ISREG(sb.st_mode)) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_NON_REGULAR;
+ return NULL;
+ }
+
+ /* mmap the file descriptor to virtually get the contents. */
+ size_t size = (size_t) sb.st_size;
+
+ if (size == 0) {
+ close(fd);
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+ }
+
+ uint8_t *source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (source == MAP_FAILED) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ close(fd);
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(source, size, PM_SOURCE_MAPPED);
+#else
+ (void) open_flags;
+ return pm_source_file_new(filepath, result);
+#endif
+}
+
+/**
+ * Create a new source by reading a file into a heap-allocated buffer.
+ */
+pm_source_t *
+pm_source_file_new(const char *filepath, pm_source_init_result_t *result) {
+#ifdef _WIN32
+ /* Open the file for reading. */
+ pm_source_file_handle_t handle;
+ *result = pm_source_file_handle_open(&handle, filepath);
+ if (*result != PM_SOURCE_INIT_SUCCESS) return NULL;
+
+ /* Get the file size. */
+ const DWORD file_size = GetFileSize(handle.file, NULL);
+ if (file_size == INVALID_FILE_SIZE) {
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* If the file is empty, return a constant source. */
+ if (file_size == 0) {
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+ }
+
+ /* Create a buffer to read the file into. */
+ uint8_t *source = xmalloc(file_size);
+ if (source == NULL) {
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Read the contents of the file. */
+ DWORD bytes_read;
+ if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
+ xfree_sized(source, file_size);
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Check the number of bytes read. */
+ if (bytes_read != file_size) {
+ xfree_sized(source, file_size);
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ pm_source_file_handle_close(&handle);
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(source, (size_t) file_size, PM_SOURCE_OWNED);
+#elif defined(PRISM_HAS_FILESYSTEM)
+ /* Open the file for reading. */
+ int fd = open(filepath, O_RDONLY);
+ if (fd == -1) {
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Stat the file to get the file size. */
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ /* Ensure it is a file and not a directory. */
+ if (S_ISDIR(sb.st_mode)) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_DIRECTORY;
+ return NULL;
+ }
+
+ /* Check the size to see if it's empty. */
+ size_t size = (size_t) sb.st_size;
+ if (size == 0) {
+ close(fd);
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(empty_source, 0, PM_SOURCE_CONSTANT);
+ }
+
+ const size_t length = (size_t) size;
+ uint8_t *source = xmalloc(length);
+ if (source == NULL) {
+ close(fd);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ ssize_t bytes_read = read(fd, source, length);
+ close(fd);
+
+ if (bytes_read == -1 || (size_t) bytes_read != length) {
+ xfree_sized(source, length);
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ return NULL;
+ }
+
+ *result = PM_SOURCE_INIT_SUCCESS;
+ return pm_source_alloc(source, length, PM_SOURCE_OWNED);
+#else
+ (void) filepath;
+ *result = PM_SOURCE_INIT_ERROR_GENERIC;
+ perror("pm_source_file_new is not implemented for this platform");
+ return NULL;
+#endif
+}
+
+/**
+ * Create a new source by reading from a stream. This allocates the source
+ * but does not read from the stream yet. Use pm_source_stream_read to read
+ * data.
+ */
+pm_source_t *
+pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof) {
+ pm_source_t *source = pm_source_alloc(NULL, 0, PM_SOURCE_STREAM);
+ source->stream.buffer = pm_buffer_new();
+ source->stream.stream = stream;
+ source->stream.fgets = fgets;
+ source->stream.feof = feof;
+ source->stream.eof = false;
+
+ return source;
+}
+
+/**
+ * Read from the stream into the source's internal buffer until __END__ is
+ * encountered or EOF is reached. Updates the source pointer and length.
+ *
+ * Returns true if EOF was reached, false if __END__ was encountered.
+ */
+bool
+pm_source_stream_read(pm_source_t *source) {
+ pm_buffer_t *buffer = source->stream.buffer;
+
+#define LINE_SIZE 4096
+ char line[LINE_SIZE];
+
+ while (memset(line, '\n', LINE_SIZE), source->stream.fgets(line, LINE_SIZE, source->stream.stream) != NULL) {
+ size_t length = LINE_SIZE;
+ while (length > 0 && line[length - 1] == '\n') length--;
+
+ if (length == LINE_SIZE) {
+ /*
+ * If we read a line that is the maximum size and it doesn't end
+ * with a newline, then we'll just append it to the buffer and
+ * continue reading.
+ */
+ length--;
+ pm_buffer_append_string(buffer, line, length);
+ continue;
+ }
+
+ /* Append the line to the buffer. */
+ length--;
+ pm_buffer_append_string(buffer, line, length);
+
+ /*
+ * Check if the line matches the __END__ marker. If it does, then stop
+ * reading and return false. In most circumstances, this means we should
+ * stop reading from the stream so that the DATA constant can pick it
+ * up.
+ */
+ switch (length) {
+ case 7:
+ if (strncmp(line, "__END__", 7) == 0) {
+ source->source = (const uint8_t *) pm_buffer_value(buffer);
+ source->length = pm_buffer_length(buffer);
+ return false;
+ }
+ break;
+ case 8:
+ if (strncmp(line, "__END__\n", 8) == 0) {
+ source->source = (const uint8_t *) pm_buffer_value(buffer);
+ source->length = pm_buffer_length(buffer);
+ return false;
+ }
+ break;
+ case 9:
+ if (strncmp(line, "__END__\r\n", 9) == 0) {
+ source->source = (const uint8_t *) pm_buffer_value(buffer);
+ source->length = pm_buffer_length(buffer);
+ return false;
+ }
+ break;
+ }
+
+ /*
+ * All data should be read via gets. If the string returned by gets
+ * _doesn't_ end with a newline, then we assume we hit EOF condition.
+ */
+ if (source->stream.feof(source->stream.stream)) {
+ break;
+ }
+ }
+
+#undef LINE_SIZE
+
+ source->stream.eof = true;
+ source->source = (const uint8_t *) pm_buffer_value(buffer);
+ source->length = pm_buffer_length(buffer);
+ return true;
+}
+
+/**
+ * Returns whether the stream source has reached EOF.
+ */
+bool
+pm_source_stream_eof(const pm_source_t *source) {
+ return source->stream.eof;
+}
+
+/**
+ * Free the given source and any memory it owns.
+ */
+void
+pm_source_free(pm_source_t *source) {
+ switch (source->type) {
+ case PM_SOURCE_CONSTANT:
+ case PM_SOURCE_SHARED:
+ /* No cleanup needed for the data. */
+ break;
+ case PM_SOURCE_OWNED:
+ xfree_sized((void *) source->source, source->length);
+ break;
+ case PM_SOURCE_MAPPED:
+#if defined(_WIN32)
+ if (source->length > 0) {
+ UnmapViewOfFile((void *) source->source);
+ }
+#elif defined(_POSIX_MAPPED_FILES)
+ if (source->length > 0) {
+ munmap((void *) source->source, source->length);
+ }
+#endif
+ break;
+ case PM_SOURCE_STREAM:
+ pm_buffer_free(source->stream.buffer);
+ break;
+ }
+
+ xfree_sized(source, sizeof(pm_source_t));
+}
+
+/**
+ * Returns the length of the source data in bytes.
+ */
+size_t
+pm_source_length(const pm_source_t *source) {
+ return source->length;
+}
+
+/**
+ * Returns a pointer to the source data.
+ */
+const uint8_t *
+pm_source_source(const pm_source_t *source) {
+ return source->source;
+}
diff --git a/prism/source.h b/prism/source.h
new file mode 100644
index 0000000000..c79987d3fb
--- /dev/null
+++ b/prism/source.h
@@ -0,0 +1,148 @@
+/**
+ * @file source.h
+ *
+ * An opaque type representing the source code being parsed, regardless of
+ * origin (constant memory, file, memory-mapped file, or stream).
+ */
+#ifndef PRISM_SOURCE_H
+#define PRISM_SOURCE_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/filesystem.h"
+#include "prism/compiler/nodiscard.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * An opaque type representing source code being parsed.
+ */
+typedef struct pm_source_t pm_source_t;
+
+/**
+ * This function is used to retrieve a line of input from a stream. It closely
+ * mirrors that of fgets so that fgets can be used as the default
+ * implementation.
+ */
+typedef char * (pm_source_stream_fgets_t)(char *string, int size, void *stream);
+
+/**
+ * This function is used to check whether a stream is at EOF. It closely mirrors
+ * that of feof so that feof can be used as the default implementation.
+ */
+typedef int (pm_source_stream_feof_t)(void *stream);
+
+/**
+ * Represents the result of initializing a source from a file.
+ */
+typedef enum {
+ /** Indicates that the source was successfully initialized. */
+ PM_SOURCE_INIT_SUCCESS = 0,
+
+ /**
+ * Indicates a generic error from a source init function, where the type
+ * of error should be read from `errno` or `GetLastError()`.
+ */
+ PM_SOURCE_INIT_ERROR_GENERIC = 1,
+
+ /**
+ * Indicates that the file that was attempted to be opened was a directory.
+ */
+ PM_SOURCE_INIT_ERROR_DIRECTORY = 2,
+
+ /**
+ * Indicates that the file is not a regular file (e.g. a pipe or character
+ * device) and the caller should handle reading it.
+ */
+ PM_SOURCE_INIT_ERROR_NON_REGULAR = 3
+} pm_source_init_result_t;
+
+/**
+ * Create a new source that wraps existing constant memory. The memory is not
+ * owned and will not be freed.
+ *
+ * @param data The pointer to the source data.
+ * @param length The length of the source data in bytes.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_constant_new(const uint8_t *data, size_t length);
+
+/**
+ * Create a new source that wraps existing shared memory. The memory is not
+ * owned and will not be freed. Semantically a "slice" of another source.
+ *
+ * @param data The pointer to the source data.
+ * @param length The length of the source data in bytes.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_shared_new(const uint8_t *data, size_t length);
+
+/**
+ * Create a new source that owns its memory. The memory will be freed with
+ * xfree when the source is freed.
+ *
+ * @param data The pointer to the heap-allocated source data.
+ * @param length The length of the source data in bytes.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_owned_new(uint8_t *data, size_t length);
+
+/**
+ * Create a new source by reading a file into a heap-allocated buffer.
+ *
+ * @param filepath The path to the file to read.
+ * @param result Out parameter for the result of the initialization.
+ * @returns A new source, or NULL on error (with result written to out param).
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_file_new(const char *filepath, pm_source_init_result_t *result) PRISM_NONNULL(1, 2);
+
+/**
+ * Create a new source by memory-mapping a file. Falls back to file reading on
+ * platforms without mmap support.
+ *
+ * If the file is a non-regular file (e.g. a pipe or character device),
+ * PM_SOURCE_INIT_ERROR_NON_REGULAR is returned, allowing the caller to handle
+ * it appropriately (e.g. by reading it through their own I/O layer).
+ *
+ * @param filepath The path to the file to read.
+ * @param open_flags Additional flags to pass to open(2) (e.g. O_NONBLOCK).
+ * @param result Out parameter for the result of the initialization.
+ * @returns A new source, or NULL on error (with result written to out param).
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_mapped_new(const char *filepath, int open_flags, pm_source_init_result_t *result) PRISM_NONNULL(1, 3);
+
+/**
+ * Create a new source by reading from a stream using the provided callbacks.
+ *
+ * @param stream The stream to read from.
+ * @param fgets The function to use to read from the stream.
+ * @param feof The function to use to check if the stream is at EOF.
+ * @returns A new source. Aborts on allocation failure.
+ */
+PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_source_t * pm_source_stream_new(void *stream, pm_source_stream_fgets_t *fgets, pm_source_stream_feof_t *feof);
+
+/**
+ * Free the given source and any memory it owns.
+ *
+ * @param source The source to free.
+ */
+PRISM_EXPORTED_FUNCTION void pm_source_free(pm_source_t *source) PRISM_NONNULL(1);
+
+/**
+ * Returns the length of the source data in bytes.
+ *
+ * @param source The source to get the length of.
+ * @returns The length of the source data.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_source_length(const pm_source_t *source) PRISM_NONNULL(1);
+
+/**
+ * Returns a pointer to the source data.
+ *
+ * @param source The source to get the data of.
+ * @returns A pointer to the source data.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_source_source(const pm_source_t *source) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/srcs.mk b/prism/srcs.mk
new file mode 100644
index 0000000000..93ad8f579f
--- /dev/null
+++ b/prism/srcs.mk
@@ -0,0 +1,160 @@
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs uncommon.mk: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+ touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+ $(PRISM_SRCDIR)/templates/template.rb \
+ $(PRISM_SRCDIR)/srcs.mk.in
+ $(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/srcs.mk $(PRISM_SRCDIR)/srcs.mk.in
+
+distclean-prism-srcs::
+ $(RM) prism/.srcs.mk.time
+ $(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+ $(RM) $(PRISM_SRCDIR)/srcs.mk
+
+realclean-srcs-local:: realclean-prism-srcs
+
+main srcs: prism-srcs
+main incs: prism-incs
+
+prism-srcs: $(srcdir)/prism/api_node.c
+$(srcdir)/prism/api_node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/ext/prism/api_node.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) ext/prism/api_node.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/api_node.c
+
+prism-incs: $(srcdir)/prism/ast.h
+$(srcdir)/prism/ast.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/ast.h.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/ast.h $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/ast.h
+
+prism-incs: $(srcdir)/prism/internal/diagnostic.h
+$(srcdir)/prism/internal/diagnostic.h: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/include/prism/internal/diagnostic.h.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) include/prism/internal/diagnostic.h $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/internal/diagnostic.h
+
+prism-srcs: $(srcdir)/lib/prism/compiler.rb
+$(srcdir)/lib/prism/compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/compiler.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/compiler.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/compiler.rb
+
+prism-srcs: $(srcdir)/lib/prism/dispatcher.rb
+$(srcdir)/lib/prism/dispatcher.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dispatcher.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dispatcher.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/dispatcher.rb
+
+prism-srcs: $(srcdir)/lib/prism/dot_visitor.rb
+$(srcdir)/lib/prism/dot_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dot_visitor.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dot_visitor.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/dot_visitor.rb
+
+prism-srcs: $(srcdir)/lib/prism/dsl.rb
+$(srcdir)/lib/prism/dsl.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/dsl.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/dsl.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/dsl.rb
+
+prism-srcs: $(srcdir)/lib/prism/inspect_visitor.rb
+$(srcdir)/lib/prism/inspect_visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/inspect_visitor.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/inspect_visitor.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/inspect_visitor.rb
+
+prism-srcs: $(srcdir)/lib/prism/mutation_compiler.rb
+$(srcdir)/lib/prism/mutation_compiler.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/mutation_compiler.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/mutation_compiler.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/mutation_compiler.rb
+
+prism-srcs: $(srcdir)/lib/prism/node.rb
+$(srcdir)/lib/prism/node.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/node.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/node.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/node.rb
+
+prism-srcs: $(srcdir)/lib/prism/reflection.rb
+$(srcdir)/lib/prism/reflection.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/reflection.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/reflection.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/reflection.rb
+
+prism-srcs: $(srcdir)/lib/prism/serialize.rb
+$(srcdir)/lib/prism/serialize.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/serialize.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/serialize.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/serialize.rb
+
+prism-srcs: $(srcdir)/lib/prism/visitor.rb
+$(srcdir)/lib/prism/visitor.rb: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/lib/prism/visitor.rb.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) lib/prism/visitor.rb $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/lib/prism/visitor.rb
+
+prism-srcs: $(srcdir)/prism/diagnostic.c
+$(srcdir)/prism/diagnostic.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/diagnostic.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/diagnostic.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/diagnostic.c
+
+prism-srcs: $(srcdir)/prism/json.c
+$(srcdir)/prism/json.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/json.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/json.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/json.c
+
+prism-srcs: $(srcdir)/prism/node.c
+$(srcdir)/prism/node.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/node.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/node.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/node.c
+
+prism-srcs: $(srcdir)/prism/prettyprint.c
+$(srcdir)/prism/prettyprint.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/prettyprint.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/prettyprint.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/prettyprint.c
+
+prism-srcs: $(srcdir)/prism/serialize.c
+$(srcdir)/prism/serialize.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/serialize.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/serialize.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/serialize.c
+
+prism-srcs: $(srcdir)/prism/tokens.c
+$(srcdir)/prism/tokens.c: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/src/tokens.c.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) src/tokens.c $@
+
+realclean-prism-srcs::
+ $(RM) $(srcdir)/prism/tokens.c
diff --git a/prism/srcs.mk.in b/prism/srcs.mk.in
new file mode 100644
index 0000000000..6149e4ae9d
--- /dev/null
+++ b/prism/srcs.mk.in
@@ -0,0 +1,52 @@
+<% # -*- ruby -*-
+# :stopdoc:
+require_relative 'templates/template'
+
+script = File.basename(__FILE__)
+srcs = output ? File.basename(output) : script.chomp('.in')
+mk = 'uncommon.mk'
+
+# %>
+PRISM_TEMPLATES_DIR = $(PRISM_SRCDIR)/templates
+PRISM_TEMPLATE = $(PRISM_TEMPLATES_DIR)/template.rb
+PRISM_CONFIG = $(PRISM_SRCDIR)/config.yml
+
+srcs <%=%><%=mk%>: prism/.srcs.mk.time
+
+prism/.srcs.mk.time: $(order_only) $(PRISM_BUILD_DIR)/.time
+prism/$(HAVE_BASERUBY:no=.srcs.mk.time):
+ touch $@
+prism/$(HAVE_BASERUBY:yes=.srcs.mk.time): \
+ $(PRISM_SRCDIR)/templates/template.rb \
+ $(PRISM_SRCDIR)/<%=%><%=script%>
+ $(BASERUBY) $(tooldir)/generic_erb.rb -c -t$@ -o $(PRISM_SRCDIR)/<%=%><%=srcs%> $(PRISM_SRCDIR)/<%=%><%=script%>
+
+distclean-prism-srcs::
+ $(RM) prism/.srcs.mk.time
+ $(RMDIRS) prism || $(NULLCMD)
+
+distclean-srcs-local:: distclean-prism-srcs
+
+realclean-prism-srcs:: distclean-prism-srcs
+ $(RM) $(PRISM_SRCDIR)/<%=%><%=srcs%>
+
+realclean-srcs-local:: realclean-prism-srcs
+
+main srcs: prism-srcs
+main incs: prism-incs
+<% Prism::Template::TEMPLATES.map do |t|
+ /\.(?:[ch]|rb)\z/ =~ t or next
+ s = '$(srcdir)/' + t.sub(%r[\A(?:(src)|ext|include)/]) {$1 && 'prism/'}
+ s.sub!(%r[\A\$(srcdir)/prism/], '$(PRISM_SRCDIR)/')
+ target = s.end_with?('.h') ? 'incs' : 'srcs'
+# %>
+
+prism-<%=%><%=target%>: <%=%><%=s%>
+<%=%><%=s%>: $(PRISM_CONFIG) $(PRISM_TEMPLATE) $(PRISM_TEMPLATES_DIR)/<%=%><%=t%>.erb
+ $(Q) $(BASERUBY) $(PRISM_TEMPLATE) <%=%><%=t%> $@
+
+realclean-prism-srcs::
+ $(RM) <%=%><%=s%>
+<%
+end
+# %>
diff --git a/prism/static_literals.c b/prism/static_literals.c
index 9fa37b999a..9af1eadf5d 100644
--- a/prism/static_literals.c
+++ b/prism/static_literals.c
@@ -1,4 +1,18 @@
-#include "prism/static_literals.h"
+#include "prism/internal/static_literals.h"
+
+#include "prism/compiler/inline.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/isinf.h"
+#include "prism/internal/stringy.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
/**
* A small struct used for passing around a subset of the information that is
@@ -7,7 +21,10 @@
*/
typedef struct {
/** The list of newline offsets to use to calculate line numbers. */
- const pm_newline_list_t *newline_list;
+ const pm_line_offset_list_t *line_offsets;
+
+ /** The start of the source being parsed. */
+ const uint8_t *start;
/** The line number that the parser starts on. */
int32_t start_line;
@@ -16,7 +33,7 @@ typedef struct {
const char *encoding_name;
} pm_static_literals_metadata_t;
-static inline uint32_t
+static PRISM_INLINE uint32_t
murmur_scramble(uint32_t value) {
value *= 0xcc9e2d51;
value = (value << 15) | (value >> 17);
@@ -92,7 +109,7 @@ node_hash(const pm_static_literals_metadata_t *metadata, const pm_node_t *node)
}
case PM_SOURCE_LINE_NODE: {
// Source lines hash their line number.
- const pm_line_column_t line_column = pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line);
+ const pm_line_column_t line_column = pm_line_offset_list_line_column(metadata->line_offsets, node->location.start, metadata->start_line);
const int32_t *value = &line_column.line;
return murmur_hash((const uint8_t *) value, sizeof(int32_t));
}
@@ -180,7 +197,7 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
}
// Finally, free the old node list and update the hash.
- xfree(hash->nodes);
+ xfree_sized(hash->nodes, hash->capacity * sizeof(pm_node_t *));
hash->nodes = new_nodes;
hash->capacity = new_capacity;
}
@@ -218,7 +235,7 @@ pm_node_hash_insert(pm_node_hash_t *hash, const pm_static_literals_metadata_t *m
*/
static void
pm_node_hash_free(pm_node_hash_t *hash) {
- if (hash->capacity > 0) xfree(hash->nodes);
+ if (hash->capacity > 0) xfree_sized(hash->nodes, hash->capacity * sizeof(pm_node_t *));
}
/**
@@ -240,7 +257,7 @@ pm_int64_value(const pm_static_literals_metadata_t *metadata, const pm_node_t *n
return integer->negative ? -value : value;
}
case PM_SOURCE_LINE_NODE:
- return (int64_t) pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line;
+ return (int64_t) pm_line_offset_list_line_column(metadata->line_offsets, node->location.start, metadata->start_line).line;
default:
assert(false && "unreachable");
return 0;
@@ -268,7 +285,7 @@ pm_compare_integer_nodes(const pm_static_literals_metadata_t *metadata, const pm
* A comparison function for comparing two FloatNode instances.
*/
static int
-pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+pm_compare_float_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
const double left_value = ((const pm_float_node_t *) left)->value;
const double right_value = ((const pm_float_node_t *) right)->value;
return PM_NUMERIC_COMPARISON(left_value, right_value);
@@ -327,7 +344,7 @@ pm_string_value(const pm_node_t *node) {
* A comparison function for comparing two nodes that have attached strings.
*/
static int
-pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+pm_compare_string_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
const pm_string_t *left_string = pm_string_value(left);
const pm_string_t *right_string = pm_string_value(right);
return pm_string_compare(left_string, right_string);
@@ -337,7 +354,7 @@ pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata
* A comparison function for comparing two RegularExpressionNode instances.
*/
static int
-pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
+pm_compare_regular_expression_nodes(PRISM_UNUSED const pm_static_literals_metadata_t *metadata, const pm_node_t *left, const pm_node_t *right) {
const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
@@ -353,14 +370,15 @@ pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_static_liter
* Add a node to the set of static literals.
*/
pm_node_t *
-pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
+pm_static_literals_add(const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace) {
switch (PM_NODE_TYPE(node)) {
case PM_INTEGER_NODE:
case PM_SOURCE_LINE_NODE:
return pm_node_hash_insert(
&literals->integer_nodes,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = NULL
},
@@ -372,7 +390,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
return pm_node_hash_insert(
&literals->float_nodes,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = NULL
},
@@ -385,7 +404,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
return pm_node_hash_insert(
&literals->number_nodes,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = NULL
},
@@ -398,7 +418,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
return pm_node_hash_insert(
&literals->string_nodes,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = NULL
},
@@ -410,7 +431,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
return pm_node_hash_insert(
&literals->regexp_nodes,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = NULL
},
@@ -422,7 +444,8 @@ pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line
return pm_node_hash_insert(
&literals->symbol_nodes,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = NULL
},
@@ -492,7 +515,7 @@ pm_static_literal_positive_p(const pm_node_t *node) {
/**
* Create a string-based representation of the given static literal.
*/
-static inline void
+static PRISM_INLINE void
pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_metadata_t *metadata, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
case PM_FALSE_NODE:
@@ -502,12 +525,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
const double value = ((const pm_float_node_t *) node)->value;
if (PRISM_ISINF(value)) {
- if (*node->location.start == '-') {
+ if (metadata->start[node->location.start] == '-') {
pm_buffer_append_byte(buffer, '-');
}
pm_buffer_append_string(buffer, "Infinity", 8);
} else if (value == 0.0) {
- if (*node->location.start == '-') {
+ if (metadata->start[node->location.start] == '-') {
pm_buffer_append_byte(buffer, '-');
}
pm_buffer_append_string(buffer, "0.0", 3);
@@ -576,7 +599,7 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
break;
}
case PM_SOURCE_LINE_NODE:
- pm_buffer_append_format(buffer, "%d", pm_newline_list_line_column(metadata->newline_list, node->location.start, metadata->start_line).line);
+ pm_buffer_append_format(buffer, "%d", pm_line_offset_list_line_column(metadata->line_offsets, node->location.start, metadata->start_line).line);
break;
case PM_STRING_NODE: {
const pm_string_t *unescaped = &((const pm_string_node_t *) node)->unescaped;
@@ -604,11 +627,12 @@ pm_static_literal_inspect_node(pm_buffer_t *buffer, const pm_static_literals_met
* Create a string-based representation of the given static literal.
*/
void
-pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
+pm_static_literal_inspect(pm_buffer_t *buffer, const pm_line_offset_list_t *line_offsets, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node) {
pm_static_literal_inspect_node(
buffer,
&(pm_static_literals_metadata_t) {
- .newline_list = newline_list,
+ .line_offsets = line_offsets,
+ .start = start,
.start_line = start_line,
.encoding_name = encoding_name
},
diff --git a/prism/stream.h b/prism/stream.h
new file mode 100644
index 0000000000..678322b442
--- /dev/null
+++ b/prism/stream.h
@@ -0,0 +1,28 @@
+/**
+ * @file stream.h
+ *
+ * Functions for parsing streams.
+ */
+#ifndef PRISM_STREAM_H
+#define PRISM_STREAM_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include "prism/arena.h"
+#include "prism/options.h"
+#include "prism/parser.h"
+#include "prism/source.h"
+
+/**
+ * Parse a stream of Ruby source and return the tree.
+ *
+ * @param parser The out parameter to write the parser to.
+ * @param arena The arena to use for all AST-lifetime allocations.
+ * @param source The source to use, created via pm_source_stream_new.
+ * @param options The optional options to use when parsing.
+ * @returns The AST representing the source.
+ */
+PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) PRISM_NONNULL(1, 2, 3);
+
+#endif
diff --git a/prism/string_query.c b/prism/string_query.c
new file mode 100644
index 0000000000..ccedaf9c00
--- /dev/null
+++ b/prism/string_query.c
@@ -0,0 +1,166 @@
+#include "prism/string_query.h"
+
+#include "prism/internal/char.h"
+#include "prism/internal/encoding.h"
+
+#include <assert.h>
+#include <string.h>
+
+/** The category of slice returned from pm_slice_type. */
+typedef enum {
+ /** Returned when the given encoding name is invalid. */
+ PM_SLICE_TYPE_ERROR = -1,
+
+ /** Returned when no other types apply to the slice. */
+ PM_SLICE_TYPE_NONE,
+
+ /** Returned when the slice is a valid local variable name. */
+ PM_SLICE_TYPE_LOCAL,
+
+ /** Returned when the slice is a valid constant name. */
+ PM_SLICE_TYPE_CONSTANT,
+
+ /** Returned when the slice is a valid method name. */
+ PM_SLICE_TYPE_METHOD_NAME
+} pm_slice_type_t;
+
+/**
+ * Check that the slice is a valid local variable name or constant.
+ */
+static pm_slice_type_t
+pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
+ // first, get the right encoding object
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
+
+ // check that there is at least one character
+ if (length == 0) return PM_SLICE_TYPE_NONE;
+
+ size_t width;
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
+ // valid because alphabetical
+ } else if (*source == '_') {
+ // valid because underscore
+ width = 1;
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
+ // valid because multibyte
+ } else {
+ // invalid because no match
+ return PM_SLICE_TYPE_NONE;
+ }
+
+ // determine the type of the slice based on the first character
+ const uint8_t *end = source + length;
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
+
+ // next, iterate through all of the bytes of the string to ensure that they
+ // are all valid identifier characters
+ source += width;
+
+ while (source < end) {
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
+ // valid because alphanumeric
+ source += width;
+ } else if (*source == '_') {
+ // valid because underscore
+ source++;
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
+ // valid because multibyte
+ source += width;
+ } else {
+ // invalid because no match
+ break;
+ }
+ }
+
+ // accept a ! or ? at the end of the slice as a method name
+ if (*source == '!' || *source == '?' || *source == '=') {
+ source++;
+ result = PM_SLICE_TYPE_METHOD_NAME;
+ }
+
+ // valid if we are at the end of the slice
+ return source == end ? result : PM_SLICE_TYPE_NONE;
+}
+
+/**
+ * Check that the slice is a valid local variable name.
+ */
+pm_string_query_t
+pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
+ switch (pm_slice_type(source, length, encoding_name)) {
+ case PM_SLICE_TYPE_ERROR:
+ return PM_STRING_QUERY_ERROR;
+ case PM_SLICE_TYPE_NONE:
+ case PM_SLICE_TYPE_CONSTANT:
+ case PM_SLICE_TYPE_METHOD_NAME:
+ return PM_STRING_QUERY_FALSE;
+ case PM_SLICE_TYPE_LOCAL:
+ return PM_STRING_QUERY_TRUE;
+ }
+
+ assert(false && "unreachable");
+ return PM_STRING_QUERY_FALSE;
+}
+
+/**
+ * Check that the slice is a valid constant name.
+ */
+pm_string_query_t
+pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
+ switch (pm_slice_type(source, length, encoding_name)) {
+ case PM_SLICE_TYPE_ERROR:
+ return PM_STRING_QUERY_ERROR;
+ case PM_SLICE_TYPE_NONE:
+ case PM_SLICE_TYPE_LOCAL:
+ case PM_SLICE_TYPE_METHOD_NAME:
+ return PM_STRING_QUERY_FALSE;
+ case PM_SLICE_TYPE_CONSTANT:
+ return PM_STRING_QUERY_TRUE;
+ }
+
+ assert(false && "unreachable");
+ return PM_STRING_QUERY_FALSE;
+}
+
+/**
+ * Check that the slice is a valid method name.
+ */
+pm_string_query_t
+pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
+#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
+#define C1(c) (*source == c)
+#define C2(s) (memcmp(source, s, 2) == 0)
+#define C3(s) (memcmp(source, s, 3) == 0)
+
+ switch (pm_slice_type(source, length, encoding_name)) {
+ case PM_SLICE_TYPE_ERROR:
+ return PM_STRING_QUERY_ERROR;
+ case PM_SLICE_TYPE_NONE:
+ break;
+ case PM_SLICE_TYPE_LOCAL:
+ // numbered parameters are not valid method names
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
+ case PM_SLICE_TYPE_CONSTANT:
+ // all constants are valid method names
+ case PM_SLICE_TYPE_METHOD_NAME:
+ // all method names are valid method names
+ return PM_STRING_QUERY_TRUE;
+ }
+
+ switch (length) {
+ case 1:
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
+ case 2:
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
+ case 3:
+ return B(C3("===") || C3("<=>") || C3("[]="));
+ default:
+ return PM_STRING_QUERY_FALSE;
+ }
+
+#undef B
+#undef C1
+#undef C2
+#undef C3
+}
diff --git a/prism/string_query.h b/prism/string_query.h
new file mode 100644
index 0000000000..6ee1a9d9b6
--- /dev/null
+++ b/prism/string_query.h
@@ -0,0 +1,63 @@
+/**
+ * @file string_query.h
+ *
+ * Functions for querying properties of strings, such as whether they are valid
+ * local variable names, constant names, or method names.
+ */
+#ifndef PRISM_STRING_QUERY_H
+#define PRISM_STRING_QUERY_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Represents the results of a slice query.
+ */
+typedef enum {
+ /** Returned if the encoding given to a slice query was invalid. */
+ PM_STRING_QUERY_ERROR = -1,
+
+ /** Returned if the result of the slice query is false. */
+ PM_STRING_QUERY_FALSE,
+
+ /** Returned if the result of the slice query is true. */
+ PM_STRING_QUERY_TRUE
+} pm_string_query_t;
+
+/**
+ * Check that the slice is a valid local variable name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3);
+
+/**
+ * Check that the slice is a valid constant name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3);
+
+/**
+ * Check that the slice is a valid method name.
+ *
+ * @param source The source to check.
+ * @param length The length of the source.
+ * @param encoding_name The name of the encoding of the source.
+ * @returns PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
+ * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
+ */
+PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) PRISM_NONNULL(1, 3);
+
+#endif
diff --git a/prism/stringy.c b/prism/stringy.c
new file mode 100644
index 0000000000..d6f4c4a777
--- /dev/null
+++ b/prism/stringy.c
@@ -0,0 +1,91 @@
+#include "prism/internal/stringy.h"
+
+#include "prism/internal/allocator.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * Initialize a shared string that is based on initial input.
+ */
+void
+pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
+ assert(start <= end);
+
+ *string = (pm_string_t) {
+ .type = PM_STRING_SHARED,
+ .source = start,
+ .length = (size_t) (end - start)
+ };
+}
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ */
+void
+pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
+ *string = (pm_string_t) {
+ .type = PM_STRING_OWNED,
+ .source = source,
+ .length = length
+ };
+}
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ */
+void
+pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
+ *string = (pm_string_t) {
+ .type = PM_STRING_CONSTANT,
+ .source = (const uint8_t *) source,
+ .length = length
+ };
+}
+
+/**
+ * Compare the underlying lengths and bytes of two strings. Returns 0 if the
+ * strings are equal, a negative number if the left string is less than the
+ * right string, and a positive number if the left string is greater than the
+ * right string.
+ */
+int
+pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
+ size_t left_length = pm_string_length(left);
+ size_t right_length = pm_string_length(right);
+
+ if (left_length < right_length) {
+ return -1;
+ } else if (left_length > right_length) {
+ return 1;
+ }
+
+ return memcmp(pm_string_source(left), pm_string_source(right), left_length);
+}
+
+/**
+ * Returns the length associated with the string.
+ */
+size_t
+pm_string_length(const pm_string_t *string) {
+ return string->length;
+}
+
+/**
+ * Returns the start pointer associated with the string.
+ */
+const uint8_t *
+pm_string_source(const pm_string_t *string) {
+ return string->source;
+}
+
+/**
+ * Free the associated memory of the given string.
+ */
+void
+pm_string_cleanup(pm_string_t *string) {
+ if (string->type == PM_STRING_OWNED) {
+ xfree_sized((void *) string->source, string->length);
+ }
+}
diff --git a/prism/stringy.h b/prism/stringy.h
new file mode 100644
index 0000000000..0d64387ac3
--- /dev/null
+++ b/prism/stringy.h
@@ -0,0 +1,72 @@
+/**
+ * @file stringy.h
+ *
+ * A generic string type that can have various ownership semantics.
+ */
+#ifndef PRISM_STRINGY_H
+#define PRISM_STRINGY_H
+
+#include "prism/compiler/exported.h"
+#include "prism/compiler/nonnull.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A generic string type that can have various ownership semantics.
+ */
+typedef struct {
+ /** A pointer to the start of the string. */
+ const uint8_t *source;
+
+ /** The length of the string in bytes of memory. */
+ size_t length;
+
+ /** The type of the string. This field determines how the string should be freed. */
+ enum {
+ /** This string is a constant string, and should not be freed. */
+ PM_STRING_CONSTANT,
+
+ /** This is a slice of another string, and should not be freed. */
+ PM_STRING_SHARED,
+
+ /** This string owns its memory, and should be freed internally. */
+ PM_STRING_OWNED
+ } type;
+} pm_string_t;
+
+/**
+ * Initialize a constant string that doesn't own its memory source.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION void pm_string_constant_init(pm_string_t *string, const char *source, size_t length) PRISM_NONNULL(1);
+
+/**
+ * Initialize an owned string that is responsible for freeing allocated memory.
+ *
+ * @param string The string to initialize.
+ * @param source The source of the string.
+ * @param length The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) PRISM_NONNULL(1, 2);
+
+/**
+ * Returns the length associated with the string.
+ *
+ * @param string The string to get the length of.
+ * @returns The length of the string.
+ */
+PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string) PRISM_NONNULL(1);
+
+/**
+ * Returns the start pointer associated with the string.
+ *
+ * @param string The string to get the start pointer of.
+ * @returns The start pointer of the string.
+ */
+PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string) PRISM_NONNULL(1);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/strncasecmp.c
index 3f58421554..a373cad6d7 100644
--- a/prism/util/pm_strncasecmp.c
+++ b/prism/strncasecmp.c
@@ -1,11 +1,12 @@
-#include "prism/util/pm_strncasecmp.h"
+#include "prism/internal/strncasecmp.h"
+
+#include "prism/compiler/inline.h"
/**
* A locale-insensitive version of `tolower(3)`
*/
-static inline int
-pm_tolower(int c)
-{
+static PRISM_INLINE int
+pm_tolower(int c) {
if ('A' <= c && c <= 'Z') {
return c | 0x20;
}
diff --git a/prism/strpbrk.c b/prism/strpbrk.c
new file mode 100644
index 0000000000..383707eb72
--- /dev/null
+++ b/prism/strpbrk.c
@@ -0,0 +1,439 @@
+#include "prism/internal/strpbrk.h"
+
+#include "prism/compiler/accel.h"
+#include "prism/compiler/inline.h"
+#include "prism/compiler/unused.h"
+
+#include "prism/internal/bit.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/parser.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+
+/**
+ * Add an invalid multibyte character error to the parser.
+ */
+static PRISM_INLINE void
+pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, uint32_t start, uint32_t length) {
+ pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_INVALID_MULTIBYTE_CHARACTER, parser->start[start]);
+}
+
+/**
+ * Set the explicit encoding for the parser to the current encoding.
+ */
+static PRISM_INLINE void
+pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, uint32_t start, uint32_t length) {
+ if (parser->explicit_encoding != NULL) {
+ if (parser->explicit_encoding == parser->encoding) {
+ // Okay, we already locked to this encoding.
+ } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ // Not okay, we already found a Unicode escape sequence and this
+ // conflicts.
+ pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->error_list, start, length, PM_ERR_MIXED_ENCODING, parser->encoding->name);
+ } else {
+ // Should not be anything else.
+ assert(false && "unreachable");
+ }
+ }
+
+ parser->explicit_encoding = parser->encoding;
+}
+
+/**
+ * Scan forward through ASCII bytes looking for a byte that is in the given
+ * charset. Returns true if a match was found, storing its offset in *index.
+ * Returns false if no match was found, storing the number of ASCII bytes
+ * consumed in *index (so the caller can skip past them).
+ *
+ * All charset characters must be ASCII (< 0x80). The scanner stops at non-ASCII
+ * bytes, returning control to the caller's encoding-aware loop.
+ *
+ * Up to three optimized implementations are selected at compile time, with a
+ * no-op fallback for unsupported platforms:
+ * 1. NEON — processes 16 bytes per iteration on aarch64.
+ * 2. SSSE3 — processes 16 bytes per iteration on x86-64.
+ * 3. SWAR — little-endian fallback, processes 8 bytes per iteration.
+ */
+
+#if defined(PRISM_HAS_NEON) || defined(PRISM_HAS_SSSE3) || defined(PRISM_HAS_SWAR)
+
+/**
+ * Update the cached strpbrk lookup tables if the charset has changed. The
+ * parser caches the last charset's precomputed tables so that repeated calls
+ * with the same breakpoints (the common case during string/regex/list lexing)
+ * skip table construction entirely.
+ *
+ * Builds three structures:
+ * - low_lut/high_lut: nibble-based lookup tables for SIMD matching (NEON/SSSE3)
+ * - table: 256-bit bitmap for scalar fallback matching (all platforms)
+ */
+static PRISM_INLINE void
+pm_strpbrk_cache_update(pm_parser_t *parser, const uint8_t *charset) {
+ // The cache key is the full charset buffer (PM_STRPBRK_CACHE_SIZE bytes).
+ // Since it is always NUL-padded, a fixed-size comparison covers both
+ // content and length.
+ if (memcmp(parser->strpbrk_cache.charset, charset, sizeof(parser->strpbrk_cache.charset)) == 0) return;
+
+ memset(parser->strpbrk_cache.low_lut, 0, sizeof(parser->strpbrk_cache.low_lut));
+ memset(parser->strpbrk_cache.high_lut, 0, sizeof(parser->strpbrk_cache.high_lut));
+ memset(parser->strpbrk_cache.table, 0, sizeof(parser->strpbrk_cache.table));
+
+ // Always include NUL in the tables. The slow path uses strchr, which
+ // always matches NUL (it finds the C string terminator), so NUL is
+ // effectively always a breakpoint. Replicating that here lets the fast
+ // scanner handle NUL at full speed instead of bailing to the slow path.
+ parser->strpbrk_cache.low_lut[0x00] |= (uint8_t) (1 << 0);
+ parser->strpbrk_cache.high_lut[0x00] = (uint8_t) (1 << 0);
+ parser->strpbrk_cache.table[0] |= (uint64_t) 1;
+
+ size_t charset_len = 0;
+ for (const uint8_t *c = charset; *c != '\0'; c++) {
+ parser->strpbrk_cache.low_lut[*c & 0x0F] |= (uint8_t) (1 << (*c >> 4));
+ parser->strpbrk_cache.high_lut[*c >> 4] = (uint8_t) (1 << (*c >> 4));
+ parser->strpbrk_cache.table[*c >> 6] |= (uint64_t) 1 << (*c & 0x3F);
+ charset_len++;
+ }
+
+ // Store the new charset key, NUL-padded to the full buffer size.
+ memcpy(parser->strpbrk_cache.charset, charset, charset_len + 1);
+ memset(parser->strpbrk_cache.charset + charset_len + 1, 0, sizeof(parser->strpbrk_cache.charset) - charset_len - 1);
+}
+
+#endif
+
+#if defined(PRISM_HAS_NEON)
+#include <arm_neon.h>
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+ pm_strpbrk_cache_update(parser, charset);
+
+ uint8x16_t low_lut = vld1q_u8(parser->strpbrk_cache.low_lut);
+ uint8x16_t high_lut = vld1q_u8(parser->strpbrk_cache.high_lut);
+ uint8x16_t mask_0f = vdupq_n_u8(0x0F);
+ uint8x16_t mask_80 = vdupq_n_u8(0x80);
+
+ size_t idx = 0;
+
+ while (idx + 16 <= maximum) {
+ uint8x16_t v = vld1q_u8(source + idx);
+
+ // If any byte has the high bit set, we have non-ASCII data.
+ // Return to let the caller's encoding-aware loop handle it.
+ if (vmaxvq_u8(vandq_u8(v, mask_80)) != 0) break;
+
+ uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
+ uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
+ uint8x16_t matched = vtstq_u8(lo_class, hi_class);
+
+ if (vmaxvq_u8(matched) == 0) {
+ idx += 16;
+ continue;
+ }
+
+ // Find the position of the first matching byte.
+ uint64_t lo64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 0);
+ if (lo64 != 0) {
+ *index = idx + pm_ctzll(lo64) / 8;
+ return true;
+ }
+ uint64_t hi64 = vgetq_lane_u64(vreinterpretq_u64_u8(matched), 1);
+ *index = idx + 8 + pm_ctzll(hi64) / 8;
+ return true;
+ }
+
+ // Scalar tail for remaining < 16 ASCII bytes.
+ while (idx < maximum && source[idx] < 0x80) {
+ uint8_t byte = source[idx];
+ if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+ *index = idx;
+ return true;
+ }
+ idx++;
+ }
+
+ *index = idx;
+ return false;
+}
+
+#elif defined(PRISM_HAS_SSSE3)
+#include <tmmintrin.h>
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+ pm_strpbrk_cache_update(parser, charset);
+
+ __m128i low_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.low_lut);
+ __m128i high_lut = _mm_loadu_si128((const __m128i *) parser->strpbrk_cache.high_lut);
+ __m128i mask_0f = _mm_set1_epi8(0x0F);
+
+ size_t idx = 0;
+
+ while (idx + 16 <= maximum) {
+ __m128i v = _mm_loadu_si128((const __m128i *) (source + idx));
+
+ // If any byte has the high bit set, stop.
+ if (_mm_movemask_epi8(v) != 0) break;
+
+ // Nibble-based classification using pshufb (SSSE3), same as NEON
+ // vqtbl1q_u8. A byte matches iff (low_lut[lo_nib] & high_lut[hi_nib]) != 0.
+ __m128i lo_class = _mm_shuffle_epi8(low_lut, _mm_and_si128(v, mask_0f));
+ __m128i hi_class = _mm_shuffle_epi8(high_lut, _mm_and_si128(_mm_srli_epi16(v, 4), mask_0f));
+ __m128i matched = _mm_and_si128(lo_class, hi_class);
+
+ // Check if any byte matched.
+ int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(matched, _mm_setzero_si128()));
+
+ if (mask == 0xFFFF) {
+ // All bytes were zero — no match in this chunk.
+ idx += 16;
+ continue;
+ }
+
+ // Find the first matching byte (first non-zero in matched).
+ *index = idx + pm_ctzll((uint64_t) (~mask & 0xFFFF));
+ return true;
+ }
+
+ // Scalar tail.
+ while (idx < maximum && source[idx] < 0x80) {
+ uint8_t byte = source[idx];
+ if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+ *index = idx;
+ return true;
+ }
+ idx++;
+ }
+
+ *index = idx;
+ return false;
+}
+
+#elif defined(PRISM_HAS_SWAR)
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(pm_parser_t *parser, const uint8_t *source, size_t maximum, const uint8_t *charset, size_t *index) {
+ pm_strpbrk_cache_update(parser, charset);
+
+ static const uint64_t highs = 0x8080808080808080ULL;
+ size_t idx = 0;
+
+ while (idx + 8 <= maximum) {
+ uint64_t word;
+ memcpy(&word, source + idx, 8);
+
+ // Bail on any non-ASCII byte.
+ if (word & highs) break;
+
+ // Check each byte against the charset table.
+ for (size_t j = 0; j < 8; j++) {
+ uint8_t byte = source[idx + j];
+ if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+ *index = idx + j;
+ return true;
+ }
+ }
+
+ idx += 8;
+ }
+
+ // Scalar tail.
+ while (idx < maximum && source[idx] < 0x80) {
+ uint8_t byte = source[idx];
+ if (parser->strpbrk_cache.table[byte >> 6] & ((uint64_t) 1 << (byte & 0x3F))) {
+ *index = idx;
+ return true;
+ }
+ idx++;
+ }
+
+ *index = idx;
+ return false;
+}
+
+#else
+
+static PRISM_INLINE bool
+scan_strpbrk_ascii(PRISM_UNUSED pm_parser_t *parser, PRISM_UNUSED const uint8_t *source, PRISM_UNUSED size_t maximum, PRISM_UNUSED const uint8_t *charset, size_t *index) {
+ *index = 0;
+ return false;
+}
+
+#endif
+
+/**
+ * This is the default path.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ if (source[index] < 0x80) {
+ index++;
+ } else {
+ size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
+
+ if (width > 0) {
+ index += width;
+ } else if (!validate) {
+ index++;
+ } else {
+ // At this point we know we have an invalid multibyte character.
+ // We'll walk forward as far as we can until we find the next
+ // valid character so that we don't spam the user with a ton of
+ // the same kind of error.
+ const size_t start = index;
+
+ do {
+ index++;
+ } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+ pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start));
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * This is the path when the encoding is ASCII-8BIT.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), 1);
+ index++;
+ }
+
+ return NULL;
+}
+
+/**
+ * This is the slow path that does care about the encoding.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+ const pm_encoding_t *encoding = parser->encoding;
+
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ if (source[index] < 0x80) {
+ index++;
+ } else {
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+ if (validate) pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width);
+
+ if (width > 0) {
+ index += width;
+ } else if (!validate) {
+ index++;
+ } else {
+ // At this point we know we have an invalid multibyte character.
+ // We'll walk forward as far as we can until we find the next
+ // valid character so that we don't spam the user with a ton of
+ // the same kind of error.
+ const size_t start = index;
+
+ do {
+ index++;
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+ pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start));
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * This is the fast path that does not care about the encoding because we know
+ * the encoding only supports single-byte characters.
+ */
+static PRISM_INLINE const uint8_t *
+pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t index, size_t maximum, bool validate) {
+ const pm_encoding_t *encoding = parser->encoding;
+
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ if (source[index] < 0x80 || !validate) {
+ index++;
+ } else {
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
+ pm_strpbrk_explicit_encoding_set(parser, (uint32_t) (source - parser->start), (uint32_t) width);
+
+ if (width > 0) {
+ index += width;
+ } else {
+ // At this point we know we have an invalid multibyte character.
+ // We'll walk forward as far as we can until we find the next
+ // valid character so that we don't spam the user with a ton of
+ // the same kind of error.
+ const size_t start = index;
+
+ do {
+ index++;
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
+
+ pm_strpbrk_invalid_multibyte_character(parser, (uint32_t) ((source + start) - parser->start), (uint32_t) (index - start));
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
+const uint8_t *
+pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
+ if (length <= 0) return NULL;
+
+ size_t maximum = (size_t) length;
+ size_t index = 0;
+ if (scan_strpbrk_ascii(parser, source, maximum, charset, &index)) return source + index;
+
+ if (!parser->encoding_changed) {
+ return pm_strpbrk_utf8(parser, source, charset, index, maximum, validate);
+ } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
+ return pm_strpbrk_ascii_8bit(parser, source, charset, index, maximum, validate);
+ } else if (parser->encoding->multibyte) {
+ return pm_strpbrk_multi_byte(parser, source, charset, index, maximum, validate);
+ } else {
+ return pm_strpbrk_single_byte(parser, source, charset, index, maximum, validate);
+ }
+}
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 23af8886a7..41d7165930 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -1,5 +1,9 @@
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
#include "prism/extension.h"
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
extern VALUE rb_cPrism;
extern VALUE rb_cPrismNode;
@@ -12,25 +16,20 @@ static VALUE rb_cPrism<%= node.name %>;
<%- end -%>
static VALUE
-pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end, VALUE source, bool freeze) {
+pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool freeze) {
if (freeze) {
- VALUE location_argv[] = {
- source,
- LONG2FIX(start - parser->start),
- LONG2FIX(end - start)
- };
-
+ VALUE location_argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation));
} else {
- uint64_t value = ((((uint64_t) (start - parser->start)) << 32) | ((uint32_t) (end - start)));
+ uint64_t value = ((((uint64_t) start) << 32) | ((uint64_t) length));
return ULL2NUM(value);
}
}
VALUE
pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
- ID type = rb_intern(pm_token_type_name(token->type));
- VALUE location = pm_location_new(parser, token->start, token->end, source, freeze);
+ ID type = rb_intern(pm_token_type(token->type));
+ VALUE location = pm_location_new((uint32_t) (token->start - pm_parser_start(parser)), (uint32_t) (token->end - token->start), source, freeze);
VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding);
if (freeze) rb_obj_freeze(slice);
@@ -79,19 +78,25 @@ pm_integer_new(const pm_integer_t *integer) {
// Create a Prism::Source object from the given parser, after pm_parse() was called.
VALUE
pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
- VALUE source_string = rb_enc_str_new((const char *) parser->start, parser->end - parser->start, encoding);
+ const uint8_t *start = pm_parser_start(parser);
+ VALUE source_string = rb_enc_str_new((const char *) start, pm_parser_end(parser) - start, encoding);
- VALUE offsets = rb_ary_new_capa(parser->newline_list.size);
- for (size_t index = 0; index < parser->newline_list.size; index++) {
- rb_ary_push(offsets, ULONG2NUM(parser->newline_list.offsets[index]));
- }
+ const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
+ VALUE offsets;
if (freeze) {
+ offsets = rb_ary_new_capa(line_offsets->size);
+ for (size_t index = 0; index < line_offsets->size; index++) {
+ rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index]));
+ }
+
rb_obj_freeze(source_string);
rb_obj_freeze(offsets);
+ } else {
+ offsets = rb_str_new((const char *) line_offsets->offsets, line_offsets->size * sizeof(uint32_t));
}
- VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(parser->start_line), offsets);
+ VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets);
if (freeze) rb_obj_freeze(source);
return source;
@@ -104,8 +109,8 @@ typedef struct pm_node_stack_node {
} pm_node_stack_node_t;
static void
-pm_node_stack_push(pm_node_stack_node_t **stack, const pm_node_t *visit) {
- pm_node_stack_node_t *node = xmalloc(sizeof(pm_node_stack_node_t));
+pm_node_stack_push(pm_arena_t *arena, pm_node_stack_node_t **stack, const pm_node_t *visit) {
+ pm_node_stack_node_t *node = (pm_node_stack_node_t *) pm_arena_alloc(arena, sizeof(pm_node_stack_node_t), PRISM_ALIGNOF(pm_node_stack_node_t));
node->prev = *stack;
node->visit = visit;
node->visited = false;
@@ -118,32 +123,40 @@ pm_node_stack_pop(pm_node_stack_node_t **stack) {
const pm_node_t *visit = current->visit;
*stack = current->prev;
- xfree(current);
return visit;
}
-VALUE
-pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
- VALUE constants = rb_ary_new_capa(parser->constant_pool.size);
-
- for (uint32_t index = 0; index < parser->constant_pool.size; index++) {
- pm_constant_t *constant = &parser->constant_pool.constants[index];
- int state = 0;
+typedef struct {
+ VALUE constants;
+ rb_encoding *encoding;
+} pm_ast_constants_each_data_t;
- VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding);
- VALUE value = rb_protect(rb_str_intern, string, &state);
+static void
+pm_ast_constants_each(const pm_constant_t *constant, void *data) {
+ pm_ast_constants_each_data_t *constants_data = (pm_ast_constants_each_data_t *) data;
+ int state = 0;
- if (state != 0) {
- value = ID2SYM(rb_intern_const("?"));
- rb_set_errinfo(Qnil);
- }
+ VALUE string = rb_enc_str_new((const char *) pm_constant_start(constant), pm_constant_length(constant), constants_data->encoding);
+ VALUE value = rb_protect(rb_str_intern, string, &state);
- rb_ary_push(constants, value);
+ if (state != 0) {
+ value = ID2SYM(rb_intern_const("?"));
+ rb_set_errinfo(Qnil);
}
+ rb_ary_push(constants_data->constants, value);
+}
+
+VALUE
+pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
+ VALUE constants = rb_ary_new_capa(pm_parser_constants_size(parser));
+ pm_ast_constants_each_data_t constants_data = { .constants = constants, .encoding = encoding };
+ pm_parser_constants_each(parser, pm_ast_constants_each, &constants_data);
+
+ pm_arena_t *node_arena = pm_arena_new();
pm_node_stack_node_t *node_stack = NULL;
- pm_node_stack_push(&node_stack, node);
+ pm_node_stack_push(node_arena, &node_stack, node);
VALUE value_stack = rb_ary_new();
while (node_stack != NULL) {
@@ -166,10 +179,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
- pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>);
+ pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>);
<%- when Prism::Template::NodeListField -%>
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
- pm_node_stack_push(&node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
+ pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
}
<%- end -%>
<%- end -%>
@@ -200,7 +213,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
argv[1] = ULONG2NUM(node->node_id);
// location
- argv[2] = pm_location_new(parser, node->location.start, node->location.end, source, freeze);
+ argv[2] = pm_location_new(node->location.start, node->location.length, source, freeze);
// flags
argv[3] = ULONG2NUM(node->flags);
@@ -237,10 +250,10 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
if (freeze) rb_obj_freeze(argv[<%= index %>]);
<%- when Prism::Template::LocationField -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
- argv[<%= index %>] = pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+ argv[<%= index %>] = pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
<%- when Prism::Template::OptionalLocationField -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
- argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
+ argv[<%= index %>] = cast-><%= field.name %>.length == 0 ? Qnil : pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
<%- when Prism::Template::UInt8Field -%>
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
@@ -271,6 +284,7 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
}
}
+ pm_arena_free(node_arena);
return rb_ary_pop(value_stack);
}
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index 751c0b43c2..3b3be25e76 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -2,16 +2,20 @@
* @file ast.h
*
* The abstract syntax tree.
+ *
+ * --
*/
#ifndef PRISM_AST_H
#define PRISM_AST_H
-#include "prism/defines.h"
-#include "prism/util/pm_constant_pool.h"
-#include "prism/util/pm_integer.h"
-#include "prism/util/pm_string.h"
+#include "prism/compiler/align.h"
+#include "prism/compiler/exported.h"
+
+#include "prism/arena.h"
+#include "prism/constant_pool.h"
+#include "prism/integer.h"
+#include "prism/stringy.h"
-#include <assert.h>
#include <stddef.h>
#include <stdint.h>
@@ -20,7 +24,7 @@
*/
typedef enum pm_token_type {
<%- tokens.each do |token| -%>
- /** <%= token.comment %> */
+ /** <%= Prism::Template::Doxygen.verbatim(token.comment) %> */
PM_TOKEN_<%= token.name %><%= " = #{token.value}" if token.value %>,
<%- end -%>
@@ -44,15 +48,28 @@ typedef struct {
} pm_token_t;
/**
- * This represents a range of bytes in the source string to which a node or
- * token corresponds.
+ * Returns a string representation of the given token type.
+ *
+ * @param token_type The type of the token to get the string representation of.
+ * @returns A string representation of the given token type. This is meant for
+ * debugging purposes and is not guaranteed to be stable across versions.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_token_type(pm_token_type_t token_type);
+
+/**
+ * This struct represents a slice in the source code, defined by an offset and
+ * a length. Note that we have confirmation that we can represent all locations
+ * within Ruby source files using 32-bit integers per:
+ *
+ * https://bugs.ruby-lang.org/issues/20488#note-1
+ *
*/
typedef struct {
- /** A pointer to the start location of the range in the source. */
- const uint8_t *start;
+ /** The offset of the location from the start of the source. */
+ uint32_t start;
- /** A pointer to the end location of the range in the source. */
- const uint8_t *end;
+ /** The length of the location. */
+ uint32_t length;
} pm_location_t;
struct pm_node;
@@ -104,29 +121,13 @@ static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
/**
- * Cast the type to an enum to allow the compiler to provide exhaustiveness
- * checking.
- */
-#define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
-
-/**
- * Return true if the type of the given node matches the given type.
- */
-#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
-
-/**
- * Return true if the given flag is set on the given node.
- */
-#define PM_NODE_FLAG_P(node, flag) ((((pm_node_t *)(node))->flags & (flag)) != 0)
-
-/**
* This is the base structure that represents a node in the syntax tree. It is
* embedded into every node type.
*/
typedef struct pm_node {
/**
* This represents the type of the node. It somewhat maps to the nodes that
- * existed in the original grammar and ripper, but it's not a 1:1 mapping.
+ * existed in the original grammar and ripper, but it is not a 1:1 mapping.
*/
pm_node_type_t type;
@@ -143,11 +144,46 @@ typedef struct pm_node {
uint32_t node_id;
/**
- * This is the location of the node in the source. It's a range of bytes
+ * This is the location of the node in the source. It is a range of bytes
* containing a start and an end.
*/
pm_location_t location;
} pm_node_t;
+
+/**
+ * Cast the given node to the base pm_node_t type.
+ */
+#define PM_NODE_UPCAST(node_) ((pm_node_t *) (node_))
+
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node_) ((enum pm_node_type) (node_)->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
+#define PM_NODE_TYPE_P(node_, type_) (PM_NODE_TYPE(node_) == (type_))
+
+/**
+ * Return the flags associated with the given node.
+ */
+#define PM_NODE_FLAGS(node_) (PM_NODE_UPCAST(node_)->flags)
+
+/**
+ * Return true if the given flag is set on the given node.
+ */
+#define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0)
+
+/**
+ * The alignment required for a child node within a parent node.
+ */
+#ifdef _MSC_VER
+#define PM_NODE_ALIGNAS __declspec(align(8))
+#else
+#define PM_NODE_ALIGNAS PRISM_ALIGNAS(PRISM_ALIGNOF(void *))
+#endif
<%- nodes.each do |node| -%>
/**
@@ -170,7 +206,6 @@ typedef struct pm_node {
typedef struct pm_<%= node.human %> {
/** The embedded base node. */
pm_node_t base;
-
<%- node.fields.each do |field| -%>
/**
@@ -183,7 +218,7 @@ typedef struct pm_<%= node.human %> {
<%- end -%>
*/
<%= case field
- when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
+ when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "PM_NODE_ALIGNAS struct #{field.c_type} *#{field.name}"
when Prism::Template::NodeListField then "struct pm_node_list #{field.name}"
when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}"
when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}"
@@ -210,8 +245,27 @@ typedef enum pm_<%= flag.human %> {
/** <%= value.comment %> */
PM_<%= flag.human.upcase %>_<%= value.name %> = <%= 1 << (index + Prism::Template::COMMON_FLAGS_COUNT) %>,
<%- end -%>
+
+ PM_<%= flag.human.upcase %>_LAST,
} pm_<%= flag.human %>_t;
<%- end -%>
+<%- nodes.each do |node| -%>
+
+<%- params = node.fields.map(&:c_param) -%>
+/**
+ * Allocate and initialize a new <%= node.name %> node.
+ *
+ * @param arena The arena to allocate from.
+ * @param node_id The unique identifier for this node.
+ * @param flags The flags for this node.
+ * @param location The location of this node in the source.
+<%- node.fields.each do |field| -%>
+ * @param <%= field.name %> <%= field.comment ? Prism::Template::Doxygen.verbatim(field.comment.lines.first.strip) : "The #{field.name} field." %>
+<%- end -%>
+ * @returns The newly allocated and initialized node.
+ */
+PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>);
+<%- end -%>
/**
* When we're serializing to Java, we want to skip serializing the location
diff --git a/prism/templates/include/prism/diagnostic.h.erb b/prism/templates/include/prism/diagnostic.h.erb
deleted file mode 100644
index 07bbc8fae7..0000000000
--- a/prism/templates/include/prism/diagnostic.h.erb
+++ /dev/null
@@ -1,130 +0,0 @@
-/**
- * @file diagnostic.h
- *
- * A list of diagnostics generated during parsing.
- */
-#ifndef PRISM_DIAGNOSTIC_H
-#define PRISM_DIAGNOSTIC_H
-
-#include "prism/ast.h"
-#include "prism/defines.h"
-#include "prism/util/pm_list.h"
-
-#include <stdbool.h>
-#include <stdlib.h>
-#include <assert.h>
-
-/**
- * The diagnostic IDs of all of the diagnostics, used to communicate the types
- * of errors between the parser and the user.
- */
-typedef enum {
- // These are the error diagnostics.
- <%- errors.each do |error| -%>
- PM_ERR_<%= error.name %>,
- <%- end -%>
-
- // These are the warning diagnostics.
- <%- warnings.each do |warning| -%>
- PM_WARN_<%= warning.name %>,
- <%- end -%>
-} pm_diagnostic_id_t;
-
-/**
- * This struct represents a diagnostic generated during parsing.
- *
- * @extends pm_list_node_t
- */
-typedef struct {
- /** The embedded base node. */
- pm_list_node_t node;
-
- /** The location of the diagnostic in the source. */
- pm_location_t location;
-
- /** The ID of the diagnostic. */
- pm_diagnostic_id_t diag_id;
-
- /** The message associated with the diagnostic. */
- const char *message;
-
- /**
- * Whether or not the memory related to the message of this diagnostic is
- * owned by this diagnostic. If it is, it needs to be freed when the
- * diagnostic is freed.
- */
- bool owned;
-
- /**
- * The level of the diagnostic, see `pm_error_level_t` and
- * `pm_warning_level_t` for possible values.
- */
- uint8_t level;
-} pm_diagnostic_t;
-
-/**
- * The levels of errors generated during parsing.
- */
-typedef enum {
- /** For errors that should raise a syntax error. */
- PM_ERROR_LEVEL_SYNTAX = 0,
-
- /** For errors that should raise an argument error. */
- PM_ERROR_LEVEL_ARGUMENT = 1,
-
- /** For errors that should raise a load error. */
- PM_ERROR_LEVEL_LOAD = 2
-} pm_error_level_t;
-
-/**
- * The levels of warnings generated during parsing.
- */
-typedef enum {
- /** For warnings which should be emitted if $VERBOSE != nil. */
- PM_WARNING_LEVEL_DEFAULT = 0,
-
- /** For warnings which should be emitted if $VERBOSE == true. */
- PM_WARNING_LEVEL_VERBOSE = 1
-} pm_warning_level_t;
-
-/**
- * Get the human-readable name of the given diagnostic ID.
- *
- * @param diag_id The diagnostic ID.
- * @return The human-readable name of the diagnostic ID.
- */
-const char * pm_diagnostic_id_human(pm_diagnostic_id_t diag_id);
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using shared
- * memory for its message.
- *
- * @param list The list to append to.
- * @param start The start of the diagnostic.
- * @param end The end of the diagnostic.
- * @param diag_id The diagnostic ID.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
-
-/**
- * Append a diagnostic to the given list of diagnostics that is using a format
- * string for its message.
- *
- * @param list The list to append to.
- * @param start The start of the diagnostic.
- * @param end The end of the diagnostic.
- * @param diag_id The diagnostic ID.
- * @param ... The arguments to the format string for the message.
- * @return Whether the diagnostic was successfully appended.
- */
-bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...);
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- *
- * @param list The list to deallocate.
- */
-void pm_diagnostic_list_free(pm_list_t *list);
-
-#endif
diff --git a/prism/templates/include/prism/internal/diagnostic.h.erb b/prism/templates/include/prism/internal/diagnostic.h.erb
new file mode 100644
index 0000000000..ee44ff5382
--- /dev/null
+++ b/prism/templates/include/prism/internal/diagnostic.h.erb
@@ -0,0 +1,60 @@
+#ifndef PRISM_INTERNAL_DIAGNOSTIC_H
+#define PRISM_INTERNAL_DIAGNOSTIC_H
+
+#include "prism/internal/list.h"
+
+#include "prism/arena.h"
+#include "prism/diagnostic.h"
+
+/*
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
+typedef enum {
+ /* These are the error diagnostics. */
+ <%- errors.each do |error| -%>
+ PM_ERR_<%= error.name %>,
+ <%- end -%>
+
+ /* These are the warning diagnostics. */
+ <%- warnings.each do |warning| -%>
+ PM_WARN_<%= warning.name %>,
+ <%- end -%>
+} pm_diagnostic_id_t;
+
+/*
+ * This struct represents a diagnostic generated during parsing.
+ */
+struct pm_diagnostic_t {
+ /* The embedded base node. */
+ pm_list_node_t node;
+
+ /* The location of the diagnostic in the source. */
+ pm_location_t location;
+
+ /* The ID of the diagnostic. */
+ pm_diagnostic_id_t diag_id;
+
+ /* The message associated with the diagnostic. */
+ const char *message;
+
+ /*
+ * The level of the diagnostic, see `pm_error_level_t` and
+ * `pm_warning_level_t` for possible values.
+ */
+ uint8_t level;
+};
+
+/*
+ * Append a diagnostic to the given list of diagnostics that is using shared
+ * memory for its message.
+ */
+void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id);
+
+/*
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ */
+void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...);
+
+#endif
diff --git a/prism/templates/lib/prism/compiler.rb.erb b/prism/templates/lib/prism/compiler.rb.erb
index 45ed88d8de..13317cac04 100644
--- a/prism/templates/lib/prism/compiler.rb.erb
+++ b/prism/templates/lib/prism/compiler.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
module Prism
# A compiler is a visitor that returns the value of each node as it visits.
# This is as opposed to a visitor which will only walk the tree. This can be
@@ -18,24 +21,32 @@ module Prism
#
class Compiler < Visitor
# Visit an individual node.
- def visit(node)
+ #--
+ #: (node?) -> untyped
+ def visit(node) # :nodoc:
node&.accept(self)
end
# Visit a list of nodes.
- def visit_all(nodes)
+ #--
+ #: (Array[node?]) -> untyped
+ def visit_all(nodes) # :nodoc:
nodes.map { |node| node&.accept(self) }
end
# Visit the child nodes of the given node.
- def visit_child_nodes(node)
- node.compact_child_nodes.map { |node| node.accept(self) }
+ #--
+ #: (node) -> Array[untyped]
+ def visit_child_nodes(node) # :nodoc:
+ node.each_child_node.map { |node| node.accept(self) }
end
<%- nodes.each_with_index do |node, index| -%>
<%= "\n" if index != 0 -%>
- # Compile a <%= node.name %> node
- alias visit_<%= node.human %> visit_child_nodes
+ #: (<%= node.name %>) -> Array[untyped]
+ def visit_<%= node.human %>(node) # :nodoc:
+ node.each_child_node.map { |node| node.accept(self) }
+ end
<%- end -%>
end
end
diff --git a/prism/templates/lib/prism/dispatcher.rb.erb b/prism/templates/lib/prism/dispatcher.rb.erb
index 52478451c9..5991b0c904 100644
--- a/prism/templates/lib/prism/dispatcher.rb.erb
+++ b/prism/templates/lib/prism/dispatcher.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
module Prism
# The dispatcher class fires events for nodes that are found while walking an
# AST to all registered listeners. It's useful for performing different types
@@ -32,50 +35,52 @@ module Prism
# dispatcher.dispatch_once(integer)
#
class Dispatcher < Visitor
- # attr_reader listeners: Hash[Symbol, Array[Listener]]
- attr_reader :listeners
+ # A hash mapping event names to arrays of listeners that should be notified
+ # when that event is fired.
+ attr_reader :listeners #: Hash[Symbol, Array[untyped]]
# Initialize a new dispatcher.
+ #--
+ #: () -> void
def initialize
@listeners = {}
end
# Register a listener for one or more events.
- #
- # def register: (Listener, *Symbol) -> void
+ #--
+ #: (untyped, *Symbol) -> void
def register(listener, *events)
register_events(listener, events)
end
# Register all public methods of a listener that match the pattern
# `on_<node_name>_(enter|leave)`.
- #
- # def register_public_methods: (Listener) -> void
+ #--
+ #: (untyped) -> void
def register_public_methods(listener)
register_events(listener, listener.public_methods(false).grep(/\Aon_.+_(?:enter|leave)\z/))
end
# Register a listener for the given events.
- private def register_events(listener, events)
+ #--
+ #: (untyped, Array[Symbol]) -> void
+ private def register_events(listener, events) # :nodoc:
events.each { |event| (listeners[event] ||= []) << listener }
end
# Walks `root` dispatching events to all registered listeners.
- #
- # def dispatch: (Node) -> void
alias dispatch visit
# Dispatches a single event for `node` to all registered listeners.
- #
- # def dispatch_once: (Node) -> void
+ #--
+ #: (node node) -> void
def dispatch_once(node)
node.accept(DispatchOnce.new(listeners))
end
<%- nodes.each do |node| -%>
- # Dispatch enter and leave events for <%= node.name %> nodes and continue
- # walking the tree.
- def visit_<%= node.human %>(node)
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node) # :nodoc:
listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
super
listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
@@ -83,14 +88,17 @@ module Prism
<%- end -%>
class DispatchOnce < Visitor # :nodoc:
- attr_reader :listeners
+ attr_reader :listeners #: Hash[Symbol, Array[untyped]]
+ #: (Hash[Symbol, Array[untyped]] listeners) -> void
def initialize(listeners)
@listeners = listeners
end
<%- nodes.each do |node| -%>
# Dispatch enter and leave events for <%= node.name %> nodes.
+ #--
+ #: (<%= node.name %> node) -> void
def visit_<%= node.human %>(node)
listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb
index e9c81e4545..88ef1e1f36 100644
--- a/prism/templates/lib/prism/dot_visitor.rb.erb
+++ b/prism/templates/lib/prism/dot_visitor.rb.erb
@@ -1,18 +1,26 @@
-require "cgi"
+#--
+# rbs_inline: enabled
+
+require "cgi/escape"
+require "cgi/util" unless defined?(CGI::EscapeExt)
module Prism
# This visitor provides the ability to call Node#to_dot, which converts a
# subtree into a graphviz dot graph.
class DotVisitor < Visitor
class Field # :nodoc:
- attr_reader :name, :value, :port
+ attr_reader :name #: String
+ attr_reader :value #: String?
+ attr_reader :port #: bool
+ #: (String name, String? value, bool port) -> void
def initialize(name, value, port)
@name = name
@value = value
@port = port
end
+ #: () -> String
def to_dot
if port
"<tr><td align=\"left\" colspan=\"2\" port=\"#{name}\">#{name}</td></tr>"
@@ -23,17 +31,21 @@ module Prism
end
class Table # :nodoc:
- attr_reader :name, :fields
+ attr_reader :name #: String
+ attr_reader :fields #: Array[Field]
+ #: (String name) -> void
def initialize(name)
@name = name
@fields = []
end
+ #: (String name, ?String? value, ?port: bool) -> void
def field(name, value = nil, port: false)
fields << Field.new(name, value, port)
end
+ #: () -> String
def to_dot
dot = <<~DOT
<table border="0" cellborder="1" cellspacing="0" cellpadding="4">
@@ -49,26 +61,31 @@ module Prism
end
class Digraph # :nodoc:
- attr_reader :nodes, :waypoints, :edges
+ attr_reader :nodes, :waypoints, :edges #: Array[String]
+ #: () -> void
def initialize
@nodes = []
@waypoints = []
@edges = []
end
+ #: (String value) -> void
def node(value)
nodes << value
end
+ #: (String value) -> void
def waypoint(value)
waypoints << value
end
+ #: (String value) -> void
def edge(value)
edges << value
end
+ #: () -> String
def to_dot
<<~DOT
digraph "Prism" {
@@ -92,21 +109,25 @@ module Prism
private_constant :Field, :Table, :Digraph
# The digraph that is being built.
- attr_reader :digraph
+ attr_reader :digraph #: Digraph
# Initialize a new dot visitor.
+ #--
+ #: () -> void
def initialize
@digraph = Digraph.new
end
# Convert this visitor into a graphviz dot graph string.
+ #--
+ #: () -> String
def to_dot
digraph.to_dot
end
<%- nodes.each do |node| -%>
- # Visit a <%= node.name %> node.
- def visit_<%= node.human %>(node)
+ #: (<%= node.name %>) -> void
+ def visit_<%= node.human %>(node) # :nodoc:
table = Table.new("<%= node.name %>")
id = node_id(node)
<%- if (node_flags = node.flags) -%>
@@ -151,7 +172,7 @@ module Prism
<%- end -%>
<%- end -%>
- digraph.nodes << <<~DOT
+ digraph.node(<<~DOT)
#{id} [
label=<#{table.to_dot.gsub(/\n/, "\n ")}>
];
@@ -164,19 +185,25 @@ module Prism
private
# Generate a unique node ID for a node throughout the digraph.
- def node_id(node)
+ #--
+ #: (node) -> String
+ def node_id(node) # :nodoc:
"Node_#{node.object_id}"
end
- # Inspect a location to display the start and end line and column numbers.
- def location_inspect(location)
+ # Inspect a location to display the start and end line and columns in bytes.
+ #--
+ #: (Location) -> String
+ def location_inspect(location) # :nodoc:
"(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})"
end
<%- flags.each do |flag| -%>
# Inspect a node that has <%= flag.human %> flags to display the flags as a
# comma-separated list.
- def <%= flag.human %>_inspect(node)
+ #--
+ #: (<%= nodes.filter_map { |node| node.name if node.flags == flag }.join(" | ") %> node) -> String
+ def <%= flag.human %>_inspect(node) # :nodoc:
flags = [] #: Array[String]
<%- flag.values.each do |value| -%>
flags << "<%= value.name.downcase %>" if node.<%= value.name.downcase %>?
diff --git a/prism/templates/lib/prism/dsl.rb.erb b/prism/templates/lib/prism/dsl.rb.erb
index e16ebb7110..be7dc6d9c1 100644
--- a/prism/templates/lib/prism/dsl.rb.erb
+++ b/prism/templates/lib/prism/dsl.rb.erb
@@ -1,8 +1,11 @@
+#--
+# rbs_inline: enabled
+
module Prism
# The DSL module provides a set of methods that can be used to create prism
# nodes in a more concise manner. For example, instead of writing:
#
- # source = Prism::Source.for("[1]")
+ # source = Prism::Source.for("[1]", 1, [0])
#
# Prism::ArrayNode.new(
# source,
@@ -56,17 +59,31 @@ module Prism
extend self
# Create a new Source object.
+ #--
+ #: (String string) -> Source
def source(string)
- Source.for(string)
+ Source.for(string, 1, build_offsets(string))
end
# Create a new Location object.
+ #--
+ #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location
def location(source: default_source, start_offset: 0, length: 0)
Location.new(source, start_offset, length)
end
<%- nodes.each do |node| -%>
+ <%-
+ params = [
+ ["source", "Source"],
+ ["node_id", "Integer"],
+ ["location", "Location"],
+ ["flags", "Integer"]
+ ].concat(node.fields.map { |field| [field.name, field.rbs_class] })
+ -%>
# Create a new <%= node.name %> node.
+ #--
+ #: (<%= params.map { |(name, type)| "?#{name}: #{type}" }.join(", ") %>) -> <%= node.name %>
def <%= node.human %>(<%= ["source: default_source", "node_id: 0", "location: default_location", "flags: 0", *node.fields.map { |field|
case field
when Prism::Template::NodeField
@@ -100,6 +117,8 @@ module Prism
<%- flags.each do |flag| -%>
# Retrieve the value of one of the <%= flag.name %> flags.
+ #--
+ #: (Symbol name) -> Integer
def <%= flag.human.chomp("s") %>(name)
case name
<%- flag.values.each do |value| -%>
@@ -114,20 +133,40 @@ module Prism
# The default source object that gets attached to nodes and locations if no
# source is specified.
+ #--
+ #: () -> Source
def default_source
- Source.for("")
+ Source.for("", 1, [0])
end
# The default location object that gets attached to nodes if no location is
# specified, which uses the given source.
+ #--
+ #: () -> Location
def default_location
Location.new(default_source, 0, 0)
end
# The default node that gets attached to nodes if no node is specified for a
# required node field.
+ #--
+ #: (Source source, Location location) -> node
def default_node(source, location)
- MissingNode.new(source, -1, location, 0)
+ ErrorRecoveryNode.new(source, -1, location, 0, nil)
+ end
+
+ private
+
+ # Build the newline byte offset array for the given source string.
+ #--
+ #: (String source) -> Array[Integer]
+ def build_offsets(source)
+ offsets = [0]
+ start = 0
+ while (index = source.byteindex("\n", start))
+ offsets << (start = index + 1)
+ end
+ offsets
end
end
end
diff --git a/prism/templates/lib/prism/inspect_visitor.rb.erb b/prism/templates/lib/prism/inspect_visitor.rb.erb
index 3cfe615d85..820f5ae75f 100644
--- a/prism/templates/lib/prism/inspect_visitor.rb.erb
+++ b/prism/templates/lib/prism/inspect_visitor.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
module Prism
# This visitor is responsible for composing the strings that get returned by
# the various #inspect methods defined on each of the nodes.
@@ -7,8 +10,9 @@ module Prism
# when we hit an element in that list. In this case, we have a special
# command that replaces the subsequent indent with the given value.
class Replace # :nodoc:
- attr_reader :value
+ attr_reader :value #: String
+ #: (String value) -> void
def initialize(value)
@value = value
end
@@ -17,19 +21,25 @@ module Prism
private_constant :Replace
# The current prefix string.
- attr_reader :indent
+ # :stopdoc:
+ attr_reader :indent #: String
+ # :startdoc:
# The list of commands that we need to execute in order to compose the
# final string.
- attr_reader :commands
+ #: stopdoc:
+ attr_reader :commands #: Array[[String | node | Replace, String]]
+ # :startdoc:
- # Initializes a new instance of the InspectVisitor.
- def initialize(indent = +"")
+ #: (?String indent) -> void
+ def initialize(indent = +"") # :nodoc:
@indent = indent
@commands = []
end
# Compose an inspect string for the given node.
+ #--
+ #: (node node) -> String
def self.compose(node)
visitor = new
node.accept(visitor)
@@ -37,7 +47,9 @@ module Prism
end
# Compose the final string.
- def compose
+ #--
+ #: () -> String
+ def compose # :nodoc:
buffer = +""
replace = nil
@@ -66,8 +78,8 @@ module Prism
end
<%- nodes.each do |node| -%>
- # Inspect a <%= node.name %> node.
- def visit_<%= node.human %>(node)
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node) # :nodoc:
commands << [inspect_node(<%= node.name.inspect %>, node), indent]
<%- (fields = [node.flags || Prism::Template::Flags.empty, *node.fields]).each_with_index do |field, index| -%>
<%- pointer = index == fields.length - 1 ? "└── " : "├── " -%>
@@ -114,13 +126,17 @@ module Prism
private
# Compose a header for the given node.
- def inspect_node(name, node)
+ #--
+ #: (String name, node node) -> String
+ def inspect_node(name, node) # :nodoc:
location = node.location
"@ #{name} (location: (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}))\n"
end
# Compose a string representing the given inner location field.
- def inspect_location(location)
+ #--
+ #: (Location? location) -> String
+ def inspect_location(location) # :nodoc:
if location
"(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}) = #{location.slice.inspect}"
else
diff --git a/prism/templates/lib/prism/mutation_compiler.rb.erb b/prism/templates/lib/prism/mutation_compiler.rb.erb
index 565ee4e315..2d555048d2 100644
--- a/prism/templates/lib/prism/mutation_compiler.rb.erb
+++ b/prism/templates/lib/prism/mutation_compiler.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
module Prism
# This visitor walks through the tree and copies each node as it is being
# visited. This is useful for consumers that want to mutate the tree, as you
@@ -5,8 +8,8 @@ module Prism
class MutationCompiler < Compiler
<%- nodes.each_with_index do |node, index| -%>
<%= "\n" if index != 0 -%>
- # Copy a <%= node.name %> node
- def visit_<%= node.human %>(node)
+ #: (<%= node.name %>) -> node?
+ def visit_<%= node.human %>(node) # :nodoc:
<%- fields = node.fields.select { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
<%- if fields.any? -%>
node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::Template::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb
index ceee2b0ffe..fb13051aba 100644
--- a/prism/templates/lib/prism/node.rb.erb
+++ b/prism/templates/lib/prism/node.rb.erb
@@ -1,24 +1,49 @@
+#--
+# rbs_inline: enabled
+
module Prism
+ # @rbs!
+ # interface _Repository
+ # def enter: (Integer node_id, Symbol field_name) -> Relocation::Entry
+ # end
+ #
+ # interface _Node
+ # def deconstruct: () -> Array[Prism::node?]
+ # def inspect: () -> String
+ # end
+ #
+ # type node = Node & _Node
+
# This represents a node in the tree. It is the parent class of all of the
# various node types.
class Node
# A pointer to the source that this node was created from.
- attr_reader :source
+ # :stopdoc:
+ attr_reader :source #: Source
private :source
+ # :startdoc:
# A unique identifier for this node. This is used in a very specific
# use case where you want to keep around a reference to a node without
# having to keep around the syntax tree in memory. This unique identifier
# will be consistent across multiple parses of the same source code.
- attr_reader :node_id
+ attr_reader :node_id #: Integer
+
+ # The location associated with this node. For lazily loading Location
+ # objects, we keep it as a packed integer until it is accessed.
+ # @rbs @location: Location | Integer
# Save this node using a saved source so that it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry
def save(repository)
repository.enter(node_id, :itself)
end
# A Location instance that represents the location of this node in the
# source.
+ #--
+ #: () -> Location
def location
location = @location
return location if location.is_a?(Location)
@@ -26,104 +51,151 @@ module Prism
end
# Save the location using a saved source so that it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry
def save_location(repository)
repository.enter(node_id, :location)
end
- # Delegates to the start_line of the associated location object.
+ # --------------------------------------------------------------------------
+ # :section: Location Delegators
+ # These methods provide convenient access to the underlying Location object.
+ # --------------------------------------------------------------------------
+
+ # Delegates to [`start_line`](rdoc-ref:Location#start_line) of the associated location object.
+ #--
+ #: () -> Integer
def start_line
location.start_line
end
- # Delegates to the end_line of the associated location object.
+ # Delegates to [`end_line`](rdoc-ref:Location#end_line) of the associated location object.
+ #--
+ #: () -> Integer
def end_line
location.end_line
end
- # The start offset of the node in the source. This method is effectively a
- # delegate method to the location object.
+ # Delegates to [`start_offset`](rdoc-ref:Location#start_offset) of the associated location object.
+ #--
+ #: () -> Integer
def start_offset
location = @location
location.is_a?(Location) ? location.start_offset : location >> 32
end
- # The end offset of the node in the source. This method is effectively a
- # delegate method to the location object.
+ # Delegates to [`end_offset`](rdoc-ref:Location#end_offset) of the associated location object.
+ #--
+ #: () -> Integer
def end_offset
location = @location
location.is_a?(Location) ? location.end_offset : ((location >> 32) + (location & 0xFFFFFFFF))
end
- # Delegates to the start_character_offset of the associated location object.
+ # Delegates to [`start_character_offset`](rdoc-ref:Location#start_character_offset)
+ # of the associated location object.
+ #--
+ #: () -> Integer
def start_character_offset
location.start_character_offset
end
- # Delegates to the end_character_offset of the associated location object.
+ # Delegates to [`end_character_offset`](rdoc-ref:Location#end_character_offset)
+ # of the associated location object.
+ #--
+ #: () -> Integer
def end_character_offset
location.end_character_offset
end
- # Delegates to the cached_start_code_units_offset of the associated location
- # object.
+ # Delegates to [`cached_start_code_units_offset`](rdoc-ref:Location#cached_start_code_units_offset)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_start_code_units_offset(cache)
location.cached_start_code_units_offset(cache)
end
- # Delegates to the cached_end_code_units_offset of the associated location
- # object.
+ # Delegates to [`cached_end_code_units_offset`](rdoc-ref:Location#cached_end_code_units_offset)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_end_code_units_offset(cache)
location.cached_end_code_units_offset(cache)
end
- # Delegates to the start_column of the associated location object.
+ # Delegates to [`start_column`](rdoc-ref:Location#start_column) of the associated location object.
+ #--
+ #: () -> Integer
def start_column
location.start_column
end
- # Delegates to the end_column of the associated location object.
+ # Delegates to [`end_column`](rdoc-ref:Location#end_column) of the associated location object.
+ #--
+ #: () -> Integer
def end_column
location.end_column
end
- # Delegates to the start_character_column of the associated location object.
+ # Delegates to [`start_character_column`](rdoc-ref:Location#start_character_column)
+ # of the associated location object.
+ #--
+ #: () -> Integer
def start_character_column
location.start_character_column
end
- # Delegates to the end_character_column of the associated location object.
+ # Delegates to [`end_character_column`](rdoc-ref:Location#end_character_column)
+ # of the associated location object.
+ #--
+ #: () -> Integer
def end_character_column
location.end_character_column
end
- # Delegates to the cached_start_code_units_column of the associated location
- # object.
+ # Delegates to [`cached_start_code_units_column`](rdoc-ref:Location#cached_start_code_units_column)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_start_code_units_column(cache)
location.cached_start_code_units_column(cache)
end
- # Delegates to the cached_end_code_units_column of the associated location
- # object.
+ # Delegates to [`cached_end_code_units_column`](rdoc-ref:Location#cached_end_code_units_column)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_end_code_units_column(cache)
location.cached_end_code_units_column(cache)
end
- # Delegates to the leading_comments of the associated location object.
+ # Delegates to [`leading_comments`](rdoc-ref:Location#leading_comments) of the associated location object.
+ #--
+ #: () -> Array[Comment]
def leading_comments
location.leading_comments
end
- # Delegates to the trailing_comments of the associated location object.
+ # Delegates to [`trailing_comments`](rdoc-ref:Location#trailing_comments) of the associated location object.
+ #--
+ #: () -> Array[Comment]
def trailing_comments
location.trailing_comments
end
- # Delegates to the comments of the associated location object.
+ # Delegates to [`comments`](rdoc-ref:Location#comments) of the associated location object.
+ #--
+ #: () -> Array[Comment]
def comments
location.comments
end
+ # :section:
+
# Returns all of the lines of the source code associated with this node.
+ #--
+ #: () -> Array[String]
def source_lines
location.source_lines
end
@@ -133,6 +205,8 @@ module Prism
alias script_lines source_lines
# Slice the location of the node from the source.
+ #--
+ #: () -> String
def slice
location.slice
end
@@ -140,28 +214,38 @@ module Prism
# Slice the location of the node from the source, starting at the beginning
# of the line that the location starts on, ending at the end of the line
# that the location ends on.
+ #--
+ #: () -> String
def slice_lines
location.slice_lines
end
# An bitset of flags for this node. There are certain flags that are common
# for all nodes, and then some nodes have specific flags.
- attr_reader :flags
+ # :stopdoc:
+ attr_reader :flags #: Integer
protected :flags
+ # :startdoc:
# Returns true if the node has the newline flag set.
+ #--
+ #: () -> bool
def newline?
flags.anybits?(NodeFlags::NEWLINE)
end
# Returns true if the node has the static literal flag set.
+ #--
+ #: () -> bool
def static_literal?
flags.anybits?(NodeFlags::STATIC_LITERAL)
end
# Similar to inspect, but respects the current level of indentation given by
# the pretty print object.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.seplist(inspect.chomp.each_line, -> { q.breakable }) do |line|
q.text(line.chomp)
end
@@ -169,6 +253,8 @@ module Prism
end
# Convert this node into a graphviz dot graph string.
+ #--
+ #: () -> String
def to_dot
# @type self: node
DotVisitor.new.tap { |visitor| accept(visitor) }.to_dot
@@ -180,28 +266,18 @@ module Prism
#
# Important to note is that the column given to this method should be in
# bytes, as opposed to characters or code units.
+ #--
+ #: (Integer line, Integer column) -> Array[node]
def tunnel(line, column)
- queue = [self] #: Array[Prism::node]
- result = [] #: Array[Prism::node]
+ queue = [self] #: Array[node]
+ result = [] #: Array[node]
+ offset = source.byte_offset(line, column)
while (node = queue.shift)
result << node
- node.compact_child_nodes.each do |child_node|
- child_location = child_node.location
-
- start_line = child_location.start_line
- end_line = child_location.end_line
-
- if start_line == end_line
- if line == start_line && column >= child_location.start_column && column < child_location.end_column
- queue << child_node
- break
- end
- elsif (line == start_line && column >= child_location.start_column) || (line == end_line && column < child_location.end_column)
- queue << child_node
- break
- elsif line > start_line && line < end_line
+ node.each_child_node do |child_node|
+ if child_node.start_offset <= offset && offset < child_node.end_offset
queue << child_node
break
end
@@ -212,13 +288,14 @@ module Prism
end
# Returns the first node that matches the given block when visited in a
- # depth-first search. This is useful for finding a node that matches a
+ # breadth-first search. This is useful for finding a node that matches a
# particular condition.
#
# node.breadth_first_search { |node| node.node_id == node_id }
- #
- def breadth_first_search(&block)
- queue = [self] #: Array[Prism::node]
+ #--
+ #: () { (node) -> bool } -> node?
+ def breadth_first_search(&blk)
+ queue = [self] #: Array[node]
while (node = queue.shift)
return node if yield node
@@ -227,10 +304,33 @@ module Prism
nil
end
+ alias find breadth_first_search
+
+ # Returns all of the nodes that match the given block when visited in a
+ # breadth-first search. This is useful for finding all nodes that match a
+ # particular condition.
+ #
+ # node.breadth_first_search_all { |node| node.is_a?(Prism::CallNode) }
+ #--
+ #: () { (node) -> bool } -> Array[node]
+ def breadth_first_search_all(&blk)
+ queue = [self] #: Array[Prism::node]
+ results = [] #: Array[Prism::node]
+
+ while (node = queue.shift)
+ results << node if yield node
+ queue.concat(node.compact_child_nodes)
+ end
+
+ results
+ end
+ alias find_all breadth_first_search_all
# Returns a list of the fields that exist for this node class. Fields
# describe the structure of the node. This kind of reflection is useful for
# things like recursively visiting each node _and_ field in the tree.
+ #--
+ #: () -> Array[Reflection::Field]
def self.fields
# This method should only be called on subclasses of Node, not Node
# itself.
@@ -240,38 +340,57 @@ module Prism
end
# --------------------------------------------------------------------------
- # :section: Node interface
- # These methods are effectively abstract methods that must be implemented by
- # the various subclasses of Node. They are here to make it easier to work
- # with typecheckers.
+ # :section: Node Interface
+ # These methods are effectively abstract methods that are implemented by
+ # the various subclasses of Node.
# --------------------------------------------------------------------------
# Accepts a visitor and calls back into the specialized visit function.
+ #--
+ #: (_Visitor visitor) -> untyped
def accept(visitor)
raise NoMethodError, "undefined method `accept' for #{inspect}"
end
# Returns an array of child nodes, including `nil`s in the place of optional
# nodes that were not present.
+ #--
+ #: () -> Array[node?]
def child_nodes
raise NoMethodError, "undefined method `child_nodes' for #{inspect}"
end
alias deconstruct child_nodes
+ # With a block given, yields each child node. Without a block, returns
+ # an enumerator that contains each child node. Excludes any `nil`s in
+ # the place of optional nodes that were not present.
+ #--
+ #: () -> Enumerator[node, void]
+ #: () { (node) -> void } -> void
+ def each_child_node(&blk)
+ raise NoMethodError, "undefined method `each_child_node' for #{inspect}"
+ end
+
# Returns an array of child nodes, excluding any `nil`s in the place of
# optional nodes that were not present.
+ #--
+ #: () -> Array[node]
def compact_child_nodes
raise NoMethodError, "undefined method `compact_child_nodes' for #{inspect}"
end
# Returns an array of child nodes and locations that could potentially have
# comments attached to them.
+ #--
+ #: () -> Array[node | Location]
def comment_targets
raise NoMethodError, "undefined method `comment_targets' for #{inspect}"
end
# Returns a string representation of the node.
+ #--
+ #: () -> String
def inspect
raise NoMethodError, "undefined method `inspect' for #{inspect}"
end
@@ -288,6 +407,8 @@ module Prism
# it uses a single integer comparison, but also because if you're on CRuby
# you can take advantage of the fact that case statements with all symbol
# keys will use a jump table.
+ #--
+ #: () -> Symbol
def type
raise NoMethodError, "undefined method `type' for #{inspect}"
end
@@ -296,6 +417,8 @@ module Prism
# splitting on the type of the node without having to do a long === chain.
# Note that like #type, it will still be slower than using == for a single
# class, but should be faster in a case statement or an array comparison.
+ #--
+ #: () -> Symbol
def self.type
raise NoMethodError, "undefined method `type' for #{inspect}"
end
@@ -306,7 +429,13 @@ module Prism
#<%= line %>
<%- end -%>
class <%= node.name -%> < Node
+ <%- node.fields.each do |field| -%>
+ # @rbs @<%= field.name %>: <%= field.rbs_class %>
+ <%- end -%>
+
# Initialize a new <%= node.name %> node.
+ #--
+ #: (Source source, Integer node_id, Location location, Integer flags, <%= node.fields.map { |field| "#{field.rbs_class} #{field.name}" }.join(", ") %>) -> void
def initialize(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
@source = source
@node_id = node_id
@@ -320,12 +449,27 @@ module Prism
<%- end -%>
end
- # def accept: (Visitor visitor) -> void
+ # ---------
+ # :section: Repository
+ # Methods related to Relocation.
+ # ---------
+
+ # ----------------------------------------------------------------------------------
+ # :section: Node Interface
+ # These methods are present on all subclasses of Node.
+ # Read the [node interface docs](Node.html#node-interface) for more information.
+ # ----------------------------------------------------------------------------------
+
+ # See Node.accept.
+ #--
+ #: (_Visitor visitor) -> untyped
def accept(visitor)
visitor.visit_<%= node.human %>(self)
end
- # def child_nodes: () -> Array[Node?]
+ # See Node.child_nodes.
+ #--
+ #: () -> Array[node?]
def child_nodes
[<%= node.fields.map { |field|
case field
@@ -335,7 +479,28 @@ module Prism
}.compact.join(", ") %>]
end
- # def compact_child_nodes: () -> Array[Node]
+ # See Node.each_child_node.
+ #--
+ #: () -> Enumerator[node, void]
+ #: () { (node) -> void } -> void
+ def each_child_node(&blk)
+ return to_enum(:each_child_node) unless block_given?
+
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ yield <%= field.name %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (<%= field.name %> = self.<%= field.name %>); yield <%= field.name %>; end
+ <%- when Prism::Template::NodeListField -%>
+ <%= field.name %>.each { |node| yield node }
+ <%- end -%>
+ <%- end -%>
+ end
+
+ # See Node.compact_child_nodes.
+ #--
+ #: () -> Array[node]
def compact_child_nodes
<%- if node.fields.any? { |field| field.is_a?(Prism::Template::OptionalNodeField) } -%>
compact = [] #: Array[Prism::node]
@@ -344,7 +509,7 @@ module Prism
<%- when Prism::Template::NodeField -%>
compact << <%= field.name %>
<%- when Prism::Template::OptionalNodeField -%>
- compact << <%= field.name %> if <%= field.name %>
+ if (<%= field.name %> = self.<%= field.name %>); compact << <%= field.name %>; end
<%- when Prism::Template::NodeListField -%>
compact.concat(<%= field.name %>)
<%- end -%>
@@ -360,7 +525,9 @@ module Prism
<%- end -%>
end
- # def comment_targets: () -> Array[Node | Location]
+ # See Node.comment_targets.
+ #--
+ #: () -> Array[node | Location]
def comment_targets
[<%= node.fields.map { |field|
case field
@@ -370,50 +537,101 @@ module Prism
}.compact.join(", ") %>] #: Array[Prism::node | Location]
end
- # def copy: (<%= (["?node_id: Integer", "?location: Location", "?flags: Integer"] + node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }).join(", ") %>) -> <%= node.name %>
+ # :call-seq:
+ # copy(**fields) -> <%= node.name %>
+ #
+ # Creates a copy of self with the given fields, using self as the template.
+ #--
+ #: (?node_id: Integer, ?location: Location, ?flags: Integer, <%= node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }.join(", ") %>) -> <%= node.name %>
def copy(<%= (["node_id", "location", "flags"] + node.fields.map(&:name)).map { |field| "#{field}: self.#{field}" }.join(", ") %>)
<%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
end
- # def deconstruct: () -> Array[Node?]
alias deconstruct child_nodes
- # def deconstruct_keys: (Array[Symbol] keys) -> { <%= (["node_id: Integer", "location: Location"] + node.fields.map { |field| "#{field.name}: #{field.rbs_class}" }).join(", ") %> }
- def deconstruct_keys(keys)
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ <%= (["node_id: node_id", "location: location"] + node.fields.map { |field| "#{field.name}: #{field.name}" }).join(", ") %> }
end
+
+ # See `Node#type`.
+ #--
+ #: () -> :<%= node.human %>
+ def type
+ :<%= node.human %>
+ end
+
+ # See `Node.type`.
+ #--
+ #: () -> :<%= node.human %>
+ def self.type
+ :<%= node.human %>
+ end
+
+ #: () -> String
+ def inspect # :nodoc:
+ InspectVisitor.compose(self)
+ end
+
+ # :section:
+
<%- if (node_flags = node.flags) -%>
<%- node_flags.values.each do |value| -%>
-
- # def <%= value.name.downcase %>?: () -> bool
+ # :category: Flags
+ # <%= value.comment %>
+ #--
+ #: () -> bool
def <%= value.name.downcase %>?
flags.anybits?(<%= node_flags.name %>::<%= value.name %>)
end
+
<%- end -%>
<%- end -%>
<%- node.fields.each do |field| -%>
-
+ <%- case field -%>
+ <%- when Prism::Template::LocationField -%>
+ # :category: Locations
+ # :call-seq:
+ # <%= field.name %> -> <%= field.call_seq_type %>
+ #
<%- if field.comment.nil? -%>
- # attr_reader <%= field.name %>: <%= field.rbs_class %>
+ # Returns the Location represented by `<%= field.name %>`.
<%- else -%>
<%- field.each_comment_line do |line| -%>
#<%= line %>
<%- end -%>
<%- end -%>
- <%- case field -%>
- <%- when Prism::Template::LocationField -%>
+ #--
+ #: () -> Location
def <%= field.name %>
location = @<%= field.name %>
return location if location.is_a?(Location)
@<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
end
+ # :category: Repository
# Save the <%= field.name %> location using the given saved source so that
# it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry
def save_<%= field.name %>(repository)
repository.enter(node_id, :<%= field.name %>)
end
+
<%- when Prism::Template::OptionalLocationField -%>
+ # :category: Locations
+ # :call-seq:
+ # <%= field.name %> -> <%= field.call_seq_type %>
+ #
+ <%- if field.comment.nil? -%>
+ # Returns the Location represented by `<%= field.name %>`.
+ <%- else -%>
+ <%- field.each_comment_line do |line| -%>
+ #<%= line %>
+ <%- end -%>
+ <%- end -%>
+ #--
+ #: () -> Location?
def <%= field.name %>
location = @<%= field.name %>
case location
@@ -426,54 +644,69 @@ module Prism
end
end
+ # :category: Repository
# Save the <%= field.name %> location using the given saved source so that
# it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry?
def save_<%= field.name %>(repository)
repository.enter(node_id, :<%= field.name %>) unless @<%= field.name %>.nil?
end
<%- else -%>
- attr_reader :<%= field.name %>
+ # :call-seq:
+ # <%= field.name %> -> <%= field.call_seq_type %>
+ #
+ <%- if field.comment.nil? -%>
+ # Returns the `<%= field.name %>` attribute.
+ <%- else -%>
+ <%- field.each_comment_line do |line| -%>
+ #<%= line %>
<%- end -%>
<%- end -%>
+ #--
+ #: () -> <%= field.rbs_class %>
+ def <%= field.name %>
+ @<%= field.name %>
+ end
+
+ <%- end -%>
+ <%- end -%>
+ # :section: Slicing
+
<%- node.fields.each do |field| -%>
<%- case field -%>
<%- when Prism::Template::LocationField -%>
<%- raise unless field.name.end_with?("_loc") -%>
<%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
-
- # def <%= field.name.delete_suffix("_loc") %>: () -> String
+ # :call-seq:
+ # <%= field.name.delete_suffix("_loc") %> -> String
+ #
+ # Slice the location of <%= field.name %> from the source.
+ #--
+ #: () -> String
def <%= field.name.delete_suffix("_loc") %>
<%= field.name %>.slice
end
+
<%- when Prism::Template::OptionalLocationField -%>
<%- raise unless field.name.end_with?("_loc") -%>
<%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
-
- # def <%= field.name.delete_suffix("_loc") %>: () -> String?
+ # :call-seq:
+ # <%= field.name.delete_suffix("_loc") %> -> String | nil
+ #
+ # Slice the location of <%= field.name %> from the source.
+ #--
+ #: () -> String?
def <%= field.name.delete_suffix("_loc") %>
<%= field.name %>&.slice
end
+
<%- end -%>
<%- end -%>
+ # :section:
- # def inspect -> String
- def inspect
- InspectVisitor.compose(self)
- end
-
- # Return a symbol representation of this node type. See `Node#type`.
- def type
- :<%= node.human %>
- end
-
- # Return a symbol representation of this node type. See `Node::type`.
- def self.type
- :<%= node.human %>
- end
-
- # Implements case-equality for the node. This is effectively == but without
- # comparing the value of locations. Locations are checked only for presence.
- def ===(other)
+ #: (untyped other) -> boolish
+ def ===(other) # :nodoc:
other.is_a?(<%= node.name %>)<%= " &&" if (fields = [*node.flags, *node.fields]).any? %>
<%- fields.each_with_index do |field, index| -%>
<%- if field.is_a?(Prism::Template::LocationField) || field.is_a?(Prism::Template::OptionalLocationField) -%>
diff --git a/prism/templates/lib/prism/reflection.rb.erb b/prism/templates/lib/prism/reflection.rb.erb
index 6c8b2f4d25..0012f120b2 100644
--- a/prism/templates/lib/prism/reflection.rb.erb
+++ b/prism/templates/lib/prism/reflection.rb.erb
@@ -1,3 +1,6 @@
+#--
+# rbs_inline: enabled
+
module Prism
# The Reflection module provides the ability to reflect on the structure of
# the syntax tree itself, as opposed to looking at a single syntax tree. This
@@ -7,9 +10,11 @@ module Prism
# for all other field types.
class Field
# The name of the field.
- attr_reader :name
+ attr_reader :name #: Symbol
# Initializes the field with the given name.
+ #--
+ #: (Symbol name) -> void
def initialize(name)
@name = name
end
@@ -83,9 +88,11 @@ module Prism
# the bitset should be accessed through their query methods.
class FlagsField < Field
# The names of the flags in the bitset.
- attr_reader :flags
+ attr_reader :flags #: Array[Symbol]
# Initializes the flags field with the given name and flags.
+ #--
+ #: (Symbol name, Array[Symbol] flags) -> void
def initialize(name, flags)
super(name)
@flags = flags
@@ -93,6 +100,8 @@ module Prism
end
# Returns the fields for the given node.
+ #--
+ #: (singleton(Node) node) -> Array[Field]
def self.fields_for(node)
case node.type
<%- nodes.each do |node| -%>
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index 104b60f484..a676f957af 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -1,16 +1,19 @@
+#--
+# rbs_inline: enabled
+
require "stringio"
require_relative "polyfill/unpack1"
module Prism
# A module responsible for deserializing parse results.
- module Serialize
+ module Serialize # :nodoc:
# The major version of prism that we are expecting to find in the serialized
# strings.
MAJOR_VERSION = 1
# The minor version of prism that we are expecting to find in the serialized
# strings.
- MINOR_VERSION = 4
+ MINOR_VERSION = 9
# The patch version of prism that we are expecting to find in the serialized
# strings.
@@ -20,9 +23,11 @@ module Prism
#
# The formatting of the source of this method is purposeful to illustrate
# the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> ParseResult
def self.load_parse(input, serialized, freeze)
input = input.dup
- source = Source.for(input)
+ source = Source.for(input, 1, [])
loader = Loader.new(source, serialized)
loader.load_header
@@ -38,16 +43,17 @@ module Prism
data_loc = loader.load_optional_location_object(freeze)
errors = loader.load_errors(encoding, freeze)
warnings = loader.load_warnings(encoding, freeze)
+ continuable = loader.load_bool
cpool_base = loader.load_uint32
cpool_size = loader.load_varuint
- constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+ constant_pool = ConstantPool.new(serialized, cpool_base, cpool_size)
- node = loader.load_node(constant_pool, encoding, freeze)
+ node = loader.load_node(constant_pool, encoding, freeze) #: ProgramNode
loader.load_constant_pool(constant_pool)
raise unless loader.eof?
- result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, source)
+ result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, continuable, source)
result.freeze if freeze
input.force_encoding(encoding)
@@ -73,8 +79,10 @@ module Prism
#
# The formatting of the source of this method is purposeful to illustrate
# the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> LexResult
def self.load_lex(input, serialized, freeze)
- source = Source.for(input)
+ source = Source.for(input, 1, [])
loader = Loader.new(source, serialized)
tokens = loader.load_tokens
@@ -90,9 +98,10 @@ module Prism
data_loc = loader.load_optional_location_object(freeze)
errors = loader.load_errors(encoding, freeze)
warnings = loader.load_warnings(encoding, freeze)
+ continuable = loader.load_bool
raise unless loader.eof?
- result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, source)
+ result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, continuable, source)
tokens.each do |token|
token[0].value.force_encoding(encoding)
@@ -117,8 +126,10 @@ module Prism
#
# The formatting of the source of this method is purposeful to illustrate
# the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> Array[Comment]
def self.load_parse_comments(input, serialized, freeze)
- source = Source.for(input)
+ source = Source.for(input, 1, [])
loader = Loader.new(source, serialized)
loader.load_header
@@ -139,8 +150,10 @@ module Prism
#
# The formatting of the source of this method is purposeful to illustrate
# the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> ParseLexResult
def self.load_parse_lex(input, serialized, freeze)
- source = Source.for(input)
+ source = Source.for(input, 1, [])
loader = Loader.new(source, serialized)
tokens = loader.load_tokens
@@ -157,17 +170,18 @@ module Prism
data_loc = loader.load_optional_location_object(freeze)
errors = loader.load_errors(encoding, freeze)
warnings = loader.load_warnings(encoding, freeze)
+ continuable = loader.load_bool
cpool_base = loader.load_uint32
cpool_size = loader.load_varuint
- constant_pool = ConstantPool.new(input, serialized, cpool_base, cpool_size)
+ constant_pool = ConstantPool.new(serialized, cpool_base, cpool_size)
- node = loader.load_node(constant_pool, encoding, freeze)
+ node = loader.load_node(constant_pool, encoding, freeze) #: ProgramNode
loader.load_constant_pool(constant_pool)
raise unless loader.eof?
- value = [node, tokens]
- result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, source)
+ value = [node, tokens] #: [ProgramNode, Array[[Token, Integer]]]
+ result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
tokens.each do |token|
token[0].value.force_encoding(encoding)
@@ -189,34 +203,36 @@ module Prism
end
class ConstantPool # :nodoc:
- attr_reader :size
+ attr_reader :size #: Integer
+
+ # @rbs @serialized: String
+ # @rbs @base: Integer
+ # @rbs @pool: Array[Symbol?]
- def initialize(input, serialized, base, size)
- @input = input
+ #: (String serialized, Integer base, Integer size) -> void
+ def initialize(serialized, base, size)
@serialized = serialized
@base = base
@size = size
@pool = Array.new(size, nil)
end
+ #: (Integer index, Encoding encoding) -> Symbol
def get(index, encoding)
@pool[index] ||=
begin
offset = @base + index * 8
- start = @serialized.unpack1("L", offset: offset)
- length = @serialized.unpack1("L", offset: offset + 4)
+ start = @serialized.unpack1("L", offset: offset) #: Integer
+ length = @serialized.unpack1("L", offset: offset + 4) #: Integer
- if start.nobits?(1 << 31)
- @input.byteslice(start, length).force_encoding(encoding).to_sym
- else
- @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(encoding).to_sym
- end
+ (@serialized.byteslice(start, length) or raise).force_encoding(encoding).to_sym
end
end
end
if RUBY_ENGINE == "truffleruby"
# StringIO is synchronized and that adds a high overhead on TruffleRuby.
+ # @rbs skip
class FastStringIO # :nodoc:
attr_accessor :pos
@@ -246,8 +262,11 @@ module Prism
end
class Loader # :nodoc:
- attr_reader :input, :io, :source
+ attr_reader :input #: String
+ attr_reader :io #: StringIO
+ attr_reader :source #: Source
+ #: (Source source, String serialized) -> void
def initialize(source, serialized)
@input = source.source.dup
raise unless serialized.encoding == Encoding::BINARY
@@ -256,40 +275,46 @@ module Prism
define_load_node_lambdas if RUBY_ENGINE != "ruby"
end
+ #: () -> bool
def eof?
io.getbyte
io.eof?
end
+ #: (ConstantPool constant_pool) -> void
def load_constant_pool(constant_pool)
trailer = 0
constant_pool.size.times do |index|
- start, length = io.read(8).unpack("L2")
- trailer += length if start.anybits?(1 << 31)
+ length = (io.read(8) or raise).unpack1("L", offset: 4) #: Integer
+ trailer += length
end
io.read(trailer)
end
+ #: () -> void
def load_header
raise "Invalid serialization" if io.read(5) != "PRISM"
- raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
+ raise "Invalid serialization" if (io.read(3) or raise).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0
end
+ #: () -> Encoding
def load_encoding
- encoding = Encoding.find(io.read(load_varuint))
+ encoding = Encoding.find((io.read(load_varuint) or raise)) or raise
@input = input.force_encoding(encoding).freeze
encoding
end
+ #: (bool freeze) -> Array[Integer]
def load_line_offsets(freeze)
offsets = Array.new(load_varuint) { load_varuint }
offsets.freeze if freeze
offsets
end
+ #: (bool freeze) -> Array[Comment]
def load_comments(freeze)
comments =
Array.new(load_varuint) do
@@ -297,6 +322,7 @@ module Prism
case load_varuint
when 0 then InlineComment.new(load_location_object(freeze))
when 1 then EmbDocComment.new(load_location_object(freeze))
+ else raise
end
comment.freeze if freeze
@@ -307,6 +333,7 @@ module Prism
comments
end
+ #: (bool freeze) -> Array[MagicComment]
def load_magic_comments(freeze)
magic_comments =
Array.new(load_varuint) do
@@ -331,10 +358,11 @@ module Prism
<%- warnings.each do |warning| -%>
<%= warning.name.downcase.to_sym.inspect %>,
<%- end -%>
- ].freeze
+ ].freeze #: Array[Symbol]
private_constant :DIAGNOSTIC_TYPES
+ #: () -> Symbol
def load_error_level
level = io.getbyte
@@ -350,13 +378,14 @@ module Prism
end
end
+ #: (Encoding encoding, bool freeze) -> Array[ParseError]
def load_errors(encoding, freeze)
errors =
Array.new(load_varuint) do
error =
ParseError.new(
DIAGNOSTIC_TYPES.fetch(load_varuint),
- load_embedded_string(encoding),
+ load_string(encoding),
load_location_object(freeze),
load_error_level
)
@@ -369,6 +398,7 @@ module Prism
errors
end
+ #: () -> Symbol
def load_warning_level
level = io.getbyte
@@ -382,13 +412,14 @@ module Prism
end
end
+ #: (Encoding encoding, bool freeze) -> Array[ParseWarning]
def load_warnings(encoding, freeze)
warnings =
Array.new(load_varuint) do
warning =
ParseWarning.new(
DIAGNOSTIC_TYPES.fetch(load_varuint),
- load_embedded_string(encoding),
+ load_string(encoding),
load_location_object(freeze),
load_warning_level
)
@@ -401,15 +432,15 @@ module Prism
warnings
end
+ #: () -> Array[[Token, Integer]]
def load_tokens
- tokens = []
+ tokens = [] #: Array[[Token, Integer]]
while (type = TOKEN_TYPES.fetch(load_varuint))
- start = load_varuint
- length = load_varuint
+ location = load_location_object(false)
+
lex_state = load_varuint
- location = Location.new(@source, start, length)
token = Token.new(@source, type, location.slice, location)
tokens << [token, lex_state]
@@ -420,25 +451,29 @@ module Prism
# variable-length integer using https://en.wikipedia.org/wiki/LEB128
# This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
+ #--
+ #: () -> Integer
def load_varuint
- n = io.getbyte
+ n = (io.getbyte or raise)
if n < 128
n
else
n -= 128
shift = 0
- while (b = io.getbyte) >= 128
+ while (b = (io.getbyte or raise)) >= 128
n += (b - 128) << (shift += 7)
end
n + (b << (shift + 7))
end
end
+ #: () -> Integer
def load_varsint
n = load_varuint
(n >> 1) ^ (-(n & 1))
end
+ #: () -> Integer
def load_integer
negative = io.getbyte != 0
length = load_varuint
@@ -450,14 +485,22 @@ module Prism
value
end
+ #: () -> Float
def load_double
- io.read(8).unpack1("D")
+ (io.read(8) or raise).unpack1("D") #: Float
end
+ #: () -> bool
+ def load_bool
+ (io.getbyte or raise) != 0
+ end
+
+ #: () -> Integer
def load_uint32
- io.read(4).unpack1("L")
+ (io.read(4) or raise).unpack1("L") #: Integer
end
+ #: (ConstantPool constant_pool, Encoding encoding, bool freeze) -> node?
def load_optional_node(constant_pool, encoding, freeze)
if io.getbyte != 0
io.pos -= 1
@@ -465,90 +508,121 @@ module Prism
end
end
- def load_embedded_string(encoding)
- io.read(load_varuint).force_encoding(encoding).freeze
- end
-
+ #: (Encoding encoding) -> String
def load_string(encoding)
- case (type = io.getbyte)
- when 1
- input.byteslice(load_varuint, load_varuint).force_encoding(encoding).freeze
- when 2
- load_embedded_string(encoding)
- else
- raise "Unknown serialized string type: #{type}"
- end
+ (io.read(load_varuint) or raise).force_encoding(encoding).freeze
end
+ #: (bool freeze) -> Location
def load_location_object(freeze)
location = Location.new(source, load_varuint, load_varuint)
location.freeze if freeze
location
end
+ # Load a location object from the serialized data. Note that we are lying
+ # about the signature a bit here, because we sometimes load it as a packed
+ # integer instead of an object.
+ #--
+ #: (bool freeze) -> Location
def load_location(freeze)
return load_location_object(freeze) if freeze
- (load_varuint << 32) | load_varuint
+ (load_varuint << 32) | load_varuint #: Location
end
+ # Load an optional location object from the serialized data if it is
+ # present. Note that we are lying about the signature a bit here, because
+ # we sometimes load it as a packed integer instead of an object.
+ #--
+ #: (bool freeze) -> Location?
def load_optional_location(freeze)
load_location(freeze) if io.getbyte != 0
end
+ #: (bool freeze) -> Location?
def load_optional_location_object(freeze)
load_location_object(freeze) if io.getbyte != 0
end
+ #: (ConstantPool constant_pool, Encoding encoding) -> Symbol
def load_constant(constant_pool, encoding)
index = load_varuint
constant_pool.get(index - 1, encoding)
end
+ #: (ConstantPool constant_pool, Encoding encoding) -> Symbol?
def load_optional_constant(constant_pool, encoding)
index = load_varuint
constant_pool.get(index - 1, encoding) if index != 0
end
if RUBY_ENGINE == "ruby"
+ #: (ConstantPool constant_pool, Encoding encoding, bool freeze) -> node
def load_node(constant_pool, encoding, freeze)
type = io.getbyte
node_id = load_varuint
- location = load_location(freeze)
- value = case type
- <%- nodes.each_with_index do |node, index| -%>
- when <%= index + 1 %> then
- <%- if node.needs_serialized_length? -%>
- load_uint32
- <%- end -%>
- <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
- case field
- when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
- when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
- when Prism::Template::StringField then "load_string(encoding)"
- when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }.tap { |nodes| nodes.freeze if freeze }"
- when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
- when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
- when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze }"
- when Prism::Template::LocationField then "load_location(freeze)"
- when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
- when Prism::Template::UInt8Field then "io.getbyte"
- when Prism::Template::UInt32Field then "load_varuint"
- when Prism::Template::IntegerField then "load_integer"
- when Prism::Template::DoubleField then "load_double"
- else raise
- end
- }].join(", ") -%>)
+ location = load_location(freeze) #: Location
+ value =
+ case type
+ <%- nodes.each_with_index do |node, index| -%>
+ when <%= index + 1 %>
+ <%- if node.needs_serialized_length? -%>
+ load_uint32
+ <%- end -%>
+ <%= node.name %>.new(
+ source,
+ node_id,
+ location,
+ load_varuint,
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ load_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ load_optional_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::StringField -%>
+ load_string(encoding),
+ <%- when Prism::Template::NodeListField -%>
+ Array.new(load_varuint) do
+ load_node(constant_pool, encoding, freeze) #: <%= field.element_rbs_class %>
+ end.tap { |nodes| nodes.freeze if freeze },
+ <%- when Prism::Template::ConstantField -%>
+ load_constant(constant_pool, encoding),
+ <%- when Prism::Template::OptionalConstantField -%>
+ load_optional_constant(constant_pool, encoding),
+ <%- when Prism::Template::ConstantListField -%>
+ Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze },
+ <%- when Prism::Template::LocationField -%>
+ load_location(freeze),
+ <%- when Prism::Template::OptionalLocationField -%>
+ load_optional_location(freeze),
+ <%- when Prism::Template::UInt8Field -%>
+ (io.getbyte or raise),
+ <%- when Prism::Template::UInt32Field -%>
+ load_varuint,
+ <%- when Prism::Template::IntegerField -%>
+ load_integer,
+ <%- when Prism::Template::DoubleField -%>
+ load_double,
+ <%- else raise -%>
+ <%- end -%>
+ <%- end -%>
+ )
<%- end -%>
- end
+ else
+ raise "Unknown node type: #{type}"
+ end
value.freeze if freeze
value
end
else
+ # @rbs skip
def load_node(constant_pool, encoding, freeze)
- @load_node_lambdas[io.getbyte].call(constant_pool, encoding, freeze)
+ @load_node_lambdas[(io.getbyte or raise)].call(constant_pool, encoding, freeze)
end
+ # @rbs skip
def define_load_node_lambdas
@load_node_lambdas = [
nil,
@@ -559,24 +633,46 @@ module Prism
<%- if node.needs_serialized_length? -%>
load_uint32
<%- end -%>
- value = <%= node.name %>.new(<%= ["source", "node_id", "location", "load_varuint", *node.fields.map { |field|
- case field
- when Prism::Template::NodeField then "load_node(constant_pool, encoding, freeze)"
- when Prism::Template::OptionalNodeField then "load_optional_node(constant_pool, encoding, freeze)"
- when Prism::Template::StringField then "load_string(encoding)"
- when Prism::Template::NodeListField then "Array.new(load_varuint) { load_node(constant_pool, encoding, freeze) }"
- when Prism::Template::ConstantField then "load_constant(constant_pool, encoding)"
- when Prism::Template::OptionalConstantField then "load_optional_constant(constant_pool, encoding)"
- when Prism::Template::ConstantListField then "Array.new(load_varuint) { load_constant(constant_pool, encoding) }"
- when Prism::Template::LocationField then "load_location(freeze)"
- when Prism::Template::OptionalLocationField then "load_optional_location(freeze)"
- when Prism::Template::UInt8Field then "io.getbyte"
- when Prism::Template::UInt32Field then "load_varuint"
- when Prism::Template::IntegerField then "load_integer"
- when Prism::Template::DoubleField then "load_double"
- else raise
- end
- }].join(", ") -%>)
+ value =
+ <%= node.name %>.new(
+ source,
+ node_id,
+ location,
+ load_varuint,
+ <%- node.fields.map do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ load_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ load_optional_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::StringField -%>
+ load_string(encoding),
+ <%- when Prism::Template::NodeListField -%>
+ Array.new(load_varuint) do
+ load_node(constant_pool, encoding, freeze) #: <%= field.element_rbs_class %>
+ end,
+ <%- when Prism::Template::ConstantField -%>
+ load_constant(constant_pool, encoding),
+ <%- when Prism::Template::OptionalConstantField -%>
+ load_optional_constant(constant_pool, encoding),
+ <%- when Prism::Template::ConstantListField -%>
+ Array.new(load_varuint) { load_constant(constant_pool, encoding) },
+ <%- when Prism::Template::LocationField -%>
+ load_location(freeze),
+ <%- when Prism::Template::OptionalLocationField -%>
+ load_optional_location(freeze),
+ <%- when Prism::Template::UInt8Field -%>
+ (io.getbyte or raise),
+ <%- when Prism::Template::UInt32Field -%>
+ load_varuint,
+ <%- when Prism::Template::IntegerField -%>
+ load_integer,
+ <%- when Prism::Template::DoubleField -%>
+ load_double,
+ <%- else raise -%>
+ <%- end -%>
+ <%- end -%>
+ )
value.freeze if freeze
value
},
@@ -584,6 +680,10 @@ module Prism
]
end
end
+
+ # @rbs!
+ # @load_node_lambdas: Array[Proc]
+ # def define_load_node_lambdas: () -> void
end
# The token types that can be indexed by their enum values.
@@ -592,7 +692,7 @@ module Prism
<%- tokens.each do |token| -%>
<%= token.name.to_sym.inspect %>,
<%- end -%>
- ].freeze
+ ].freeze #: Array[Symbol?]
private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION
private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES
diff --git a/prism/templates/lib/prism/visitor.rb.erb b/prism/templates/lib/prism/visitor.rb.erb
index 4b30a1815b..f23e87d99e 100644
--- a/prism/templates/lib/prism/visitor.rb.erb
+++ b/prism/templates/lib/prism/visitor.rb.erb
@@ -1,4 +1,14 @@
+#--
+# rbs_inline: enabled
+
module Prism
+ # @rbs!
+ # interface _Visitor
+ # <% nodes.each do |node| %>
+ # def visit_<%= node.human %>: (<%= node.name %>) -> void
+ # <% end %>
+ # end
+
# A class that knows how to walk down the tree. None of the individual visit
# methods are implemented on this visitor, so it forces the consumer to
# implement each one that they need. For a default implementation that
@@ -6,21 +16,27 @@ module Prism
class BasicVisitor
# Calls `accept` on the given node if it is not `nil`, which in turn should
# call back into this visitor by calling the appropriate `visit_*` method.
+ #--
+ #: (node? node) -> void
def visit(node)
# @type self: _Visitor
node&.accept(self)
end
# Visits each node in `nodes` by calling `accept` on each one.
+ #--
+ #: (Array[node?] nodes) -> void
def visit_all(nodes)
# @type self: _Visitor
nodes.each { |node| node&.accept(self) }
end
# Visits the child nodes of `node` by calling `accept` on each one.
+ #--
+ #: (node node) -> void
def visit_child_nodes(node)
# @type self: _Visitor
- node.compact_child_nodes.each { |node| node.accept(self) }
+ node.each_child_node { |node| node.accept(self) }
end
end
@@ -34,7 +50,7 @@ module Prism
#
# class FooCalls < Prism::Visitor
# def visit_call_node(node)
- # if node.name == "foo"
+ # if node.name == :foo
# # Do something with the node
# end
#
@@ -47,7 +63,11 @@ module Prism
<%- nodes.each_with_index do |node, index| -%>
<%= "\n" if index != 0 -%>
# Visit a <%= node.name %> node
- alias visit_<%= node.human %> visit_child_nodes
+ #--
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node)
+ node.each_child_node { |node| node.accept(self) }
+ end
<%- end -%>
end
end
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
index ce98dc5acd..0dea732869 100644
--- a/prism/templates/src/diagnostic.c.erb
+++ b/prism/templates/src/diagnostic.c.erb
@@ -1,4 +1,16 @@
-#include "prism/diagnostic.h"
+#include "prism/internal/diagnostic.h"
+
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+#include "prism/internal/list.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
#define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %>
@@ -75,16 +87,16 @@ typedef struct {
* * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
*/
static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
- // Special error that can be replaced
+ /* Special error that can be replaced */
[PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_SYNTAX },
- // Errors that should raise argument errors
+ /* Errors that should raise argument errors */
[PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
- // Errors that should raise load errors
+ /* Errors that should raise load errors */
[PM_ERR_SCRIPT_NOT_FOUND] = { "no Ruby script found in input", PM_ERROR_LEVEL_LOAD },
- // Errors that should raise syntax errors
+ /* Errors that should raise syntax errors */
[PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE] = { "invalid argument being passed to `alias`; can't make alias for the number variables", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
@@ -102,6 +114,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_ARGUMENT_FORWARDING_UNBOUND] = { "unexpected `...` in an non-parenthesized call", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND] = { "unexpected `&`; no anonymous block parameter", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = { "unexpected ... when the parent method is not forwarding", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA] = { "unexpected ... in lambda argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK] = { "unexpected ... in block argument", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = { "unexpected `*`; no anonymous rest parameter", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR] = { "unexpected `**`; no anonymous keyword rest parameter", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = { "unexpected `*` splat argument after a `**` keyword splat argument", PM_ERROR_LEVEL_SYNTAX },
@@ -144,7 +158,9 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_CONDITIONAL_WHILE_PREDICATE] = { "expected a predicate expression for the `while` statement", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = { "expected a constant after the `::` operator", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEF_ENDLESS] = { "could not parse the endless method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_PARAMETERS] = { "could not parse the endless method parameters", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEF_ENDLESS_SETTER] = { "invalid method name; a setter method cannot be defined in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_DO_BLOCK] = { "unexpected `do` for block in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEF_NAME] = { "unexpected %s; expected a method name", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEF_PARAMS_TERM] = { "expected a delimiter to close the parameters", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEF_PARAMS_TERM_PAREN] = { "unexpected %s; expected a `)` to close the parameters", PM_ERROR_LEVEL_SYNTAX },
@@ -184,6 +200,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_EXPECT_FOR_DELIMITER] = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_IN_DELIMITER] = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN] = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_MESSAGE] = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EXPECT_RBRACKET] = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX },
@@ -298,6 +316,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_PARAMETER_UNEXPECTED_NO_KW] = { "unexpected **nil; no keywords marker disallowed after keywords", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS] = { "unexpected multiple '*' rest patterns in an array pattern", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_CAPTURE_DUPLICATE] = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE] = { "variable capture in alternative pattern", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX },
@@ -323,13 +342,15 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_PATTERN_TERM_PAREN] = { "expected a `)` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = { "unexpected `||=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH] = { "regexp encoding option '%c' differs from source encoding '%s'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8] = { "escaped non ASCII character in UTF-8 regexp: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING] = { "incompatible character encoding: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
- [PM_ERR_REGEXP_NON_ESCAPED_MBC] = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_INVALID_CHAR_PROPERTY] = { "invalid character property name {%.*s}: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_REGEXP_INVALID_UNICODE_RANGE] = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_NON_ESCAPED_MBC] = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_REGEXP_PARSE_ERROR] = { "%s", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_REGEXP_UNKNOWN_OPTIONS] = { "unknown regexp %s - %.*s", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_REGEXP_TERM] = { "unterminated regexp meets end of file; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX },
- [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP] = { "UTF-8 character in non UTF-8 regexp: /%s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP] = { "UTF-8 character in non UTF-8 regexp: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_RESCUE_EXPRESSION] = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_RESCUE_MODIFIER_VALUE] = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_RESCUE_TERM] = { "expected a closing delimiter for the `rescue` clause", PM_ERROR_LEVEL_SYNTAX },
@@ -344,7 +365,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_STRING_INTERPOLATED_TERM] = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
- [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, // TODO expected symbol? prism.c ~9719
+ [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, /* TODO expected symbol? prism.c ~9719 */
[PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
@@ -358,6 +379,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_UNEXPECTED_INDEX_KEYWORDS] = { "unexpected keyword arg given in index assignment; keywords are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_UNEXPECTED_LABEL] = { "unexpected label", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_UNEXPECTED_MULTI_WRITE] = { "unexpected multiple assignment; multiple assignment is not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE] = { "unexpected %s; expected a default value for a parameter", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_UNEXPECTED_RANGE_OPERATOR] = { "unexpected range operator; .. and ... are non-associative and cannot be chained", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_UNEXPECTED_SAFE_NAVIGATION] = { "&. inside multiple assignment destination", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_SYNTAX },
@@ -370,7 +392,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_WRITE_TARGET_UNEXPECTED] = { "unexpected write target", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_XSTRING_TERM] = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_SYNTAX },
- // Warnings
+ /* Warnings */
[PM_WARN_AMBIGUOUS_BINARY_OPERATOR] = { "'%s' after local variable or literal is interpreted as binary operator even though it seems like %s", PM_WARNING_LEVEL_VERBOSE },
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = { "ambiguous first argument; put parentheses or a space even after `-` operator", PM_WARNING_LEVEL_VERBOSE },
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
@@ -406,8 +428,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
/**
* Get the human-readable name of the given diagnostic ID.
*/
-const char *
-pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) {
+static const char *
+pm_diagnostic_id_name(pm_diagnostic_id_t diag_id) {
switch (diag_id) {
<%- errors.each do |error| -%>
case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>";
@@ -421,8 +443,8 @@ pm_diagnostic_id_human(pm_diagnostic_id_t diag_id) {
return "";
}
-static inline const char *
-pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
+static PRISM_INLINE const char *
+pm_diagnostic_id_message(pm_diagnostic_id_t diag_id) {
assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
const char *message = diagnostic_messages[diag_id].message;
@@ -431,91 +453,102 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
return message;
}
-static inline uint8_t
-pm_diagnostic_level(pm_diagnostic_id_t diag_id) {
+static PRISM_INLINE uint8_t
+pm_diagnostic_id_level(pm_diagnostic_id_t diag_id) {
assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
return (uint8_t) diagnostic_messages[diag_id].level;
}
/**
+ * Get the type of the given diagnostic.
+ */
+const char *
+pm_diagnostic_type(const pm_diagnostic_t *diagnostic) {
+ return pm_diagnostic_id_name(diagnostic->diag_id);
+}
+
+/**
+ * Get the location of the given diagnostic.
+ */
+pm_location_t
+pm_diagnostic_location(const pm_diagnostic_t *diagnostic) {
+ return diagnostic->location;
+}
+
+/**
+ * Get the message of the given diagnostic.
+ */
+const char *
+pm_diagnostic_message(const pm_diagnostic_t *diagnostic) {
+ return diagnostic->message;
+}
+
+/**
+ * Get the error level associated with the given diagnostic.
+ */
+pm_error_level_t
+pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) {
+ return (pm_error_level_t) pm_diagnostic_id_level(diagnostic->diag_id);
+}
+
+/**
+ * Get the warning level associated with the given diagnostic.
+ */
+pm_warning_level_t
+pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) {
+ return (pm_warning_level_t) pm_diagnostic_id_level(diagnostic->diag_id);
+}
+
+/**
* Append an error to the given list of diagnostic.
*/
-bool
-pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
- if (diagnostic == NULL) return false;
+void
+pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
*diagnostic = (pm_diagnostic_t) {
- .location = { start, end },
+ .location = { .start = start, .length = length },
.diag_id = diag_id,
- .message = pm_diagnostic_message(diag_id),
- .owned = false,
- .level = pm_diagnostic_level(diag_id)
+ .message = pm_diagnostic_id_message(diag_id),
+ .level = pm_diagnostic_id_level(diag_id)
};
pm_list_append(list, (pm_list_node_t *) diagnostic);
- return true;
}
/**
* Append a diagnostic to the given list of diagnostics that is using a format
* string for its message.
*/
-bool
-pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id, ...) {
+void
+pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) {
va_list arguments;
va_start(arguments, diag_id);
- const char *format = pm_diagnostic_message(diag_id);
+ const char *format = pm_diagnostic_id_message(diag_id);
int result = vsnprintf(NULL, 0, format, arguments);
va_end(arguments);
if (result < 0) {
- return false;
+ return;
}
- pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) xcalloc(1, sizeof(pm_diagnostic_t));
- if (diagnostic == NULL) {
- return false;
- }
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
- size_t length = (size_t) (result + 1);
- char *message = (char *) xmalloc(length);
- if (message == NULL) {
- xfree(diagnostic);
- return false;
- }
+ size_t message_length = (size_t) (result + 1);
+ char *message = (char *) pm_arena_alloc(arena, message_length, 1);
va_start(arguments, diag_id);
- vsnprintf(message, length, format, arguments);
+ vsnprintf(message, message_length, format, arguments);
va_end(arguments);
*diagnostic = (pm_diagnostic_t) {
- .location = { start, end },
+ .location = { .start = start, .length = length },
.diag_id = diag_id,
.message = message,
- .owned = true,
- .level = pm_diagnostic_level(diag_id)
+ .level = pm_diagnostic_id_level(diag_id)
};
pm_list_append(list, (pm_list_node_t *) diagnostic);
- return true;
-}
-
-/**
- * Deallocate the internal state of the given diagnostic list.
- */
-void
-pm_diagnostic_list_free(pm_list_t *list) {
- pm_diagnostic_t *node = (pm_diagnostic_t *) list->head;
-
- while (node != NULL) {
- pm_diagnostic_t *next = (pm_diagnostic_t *) node->node.next;
-
- if (node->owned) xfree((void *) node->message);
- xfree(node);
-
- node = next;
- }
}
diff --git a/prism/templates/src/json.c.erb b/prism/templates/src/json.c.erb
new file mode 100644
index 0000000000..5c4ab8d92a
--- /dev/null
+++ b/prism/templates/src/json.c.erb
@@ -0,0 +1,130 @@
+#include "prism/json.h"
+
+// Ensure this translation unit is never empty, even when JSON is excluded.
+typedef int pm_json_unused_t;
+
+#ifndef PRISM_EXCLUDE_JSON
+
+#include "prism/internal/buffer.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/parser.h"
+
+#include <inttypes.h>
+
+static void
+pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
+ const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+}
+
+static void
+pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) {
+ pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length);
+}
+
+/**
+ * Dump JSON to the given buffer.
+ */
+void
+pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ <%- nodes.each do |node| -%>
+ case <%= node.type %>: {
+ pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
+
+ const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+ pm_dump_json_location(buffer, &cast->base.location);
+ <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
+
+ // Dump the <%= field.name %> field
+ pm_buffer_append_byte(buffer, ',');
+ <%- if field.is_a?(Prism::Template::Flags) -%>
+ pm_buffer_append_string(buffer, "\"flags\":", 8);
+ <%- else -%>
+ pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
+ <%- end -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (cast-><%= field.name %> != NULL) {
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::NodeListField -%>
+ const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::StringField -%>
+ const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+ <%- when Prism::Template::ConstantField -%>
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalConstantField -%>
+ if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::ConstantListField -%>
+ const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::LocationField -%>
+ pm_dump_json_location(buffer, &cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalLocationField -%>
+ if (cast-><%= field.name %>.length != 0) {
+ pm_dump_json_location(buffer, &cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::UInt8Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
+ <%- when Prism::Template::Flags -%>
+ size_t flags = 0;
+ pm_buffer_append_byte(buffer, '[');
+ <%- node.flags.values.each_with_index do |value, index| -%>
+ if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
+ if (flags != 0) pm_buffer_append_byte(buffer, ',');
+ pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
+ flags++;
+ }
+ <%- end -%>
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::IntegerField -%>
+ pm_integer_string(buffer, &cast-><%= field.name %>);
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
+ <%- else -%>
+ <%- raise %>
+ <%- end -%>
+ <%- end -%>
+
+ pm_buffer_append_byte(buffer, '}');
+ break;
+ }
+ <%- end -%>
+ case PM_SCOPE_NODE:
+ break;
+ }
+}
+
+#endif
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index 2357e55200..f51aff6e53 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -1,153 +1,85 @@
#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
-#include "prism/node.h"
+#include "prism/internal/node.h"
+
+#include "prism/internal/arena.h"
+
+#include <stdlib.h>
/**
* Attempts to grow the node list to the next size. If there is already
- * capacity in the list, this function does nothing. Otherwise it reallocates
- * the list to be twice as large as it was before. If the reallocation fails,
- * this function returns false, otherwise it returns true.
+ * capacity in the list, this function does nothing. Otherwise it allocates a
+ * new array from the arena (abandon-and-copy strategy) and copies the existing
+ * data into it.
*/
-static bool
-pm_node_list_grow(pm_node_list_t *list, size_t size) {
+static void
+pm_node_list_grow(pm_arena_t *arena, pm_node_list_t *list, size_t size) {
size_t requested_size = list->size + size;
- // If the requested size caused overflow, return false.
- if (requested_size < list->size) return false;
+ // Guard against overflow on the addition.
+ if (requested_size < list->size) abort();
- // If the requested size is within the existing capacity, return true.
- if (requested_size < list->capacity) return true;
+ // If the requested size is within the existing capacity, return.
+ if (requested_size <= list->capacity) return;
- // Otherwise, reallocate the list to be twice as large as it was before.
+ // Otherwise, compute the next capacity by doubling.
size_t next_capacity = list->capacity == 0 ? 4 : list->capacity * 2;
- // If multiplying by 2 caused overflow, return false.
- if (next_capacity < list->capacity) return false;
-
- // If we didn't get enough by doubling, keep doubling until we do.
+ // Guard against overflow on the doubling.
while (requested_size > next_capacity) {
- size_t double_capacity = next_capacity * 2;
-
- // Ensure we didn't overflow by multiplying by 2.
- if (double_capacity < next_capacity) return false;
- next_capacity = double_capacity;
+ if (next_capacity == 0) abort();
+ next_capacity *= 2;
}
- pm_node_t **nodes = (pm_node_t **) xrealloc(list->nodes, sizeof(pm_node_t *) * next_capacity);
- if (nodes == NULL) return false;
+ // Allocate a new array from the arena (old array is abandoned).
+ pm_node_t **nodes = (pm_node_t **) pm_arena_alloc(arena, sizeof(pm_node_t *) * next_capacity, PRISM_ALIGNOF(pm_node_t *));
+
+ // Copy old data into the new array.
+ if (list->size > 0) {
+ memcpy(nodes, list->nodes, list->size * sizeof(pm_node_t *));
+ }
list->nodes = nodes;
list->capacity = next_capacity;
- return true;
}
/**
- * Append a new node onto the end of the node list.
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly - use pm_node_list_append instead.
*/
void
-pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
- if (pm_node_list_grow(list, 1)) {
- list->nodes[list->size++] = node;
- }
+pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+ pm_node_list_grow(arena, list, 1);
+ list->nodes[list->size++] = node;
}
/**
* Prepend a new node onto the beginning of the node list.
*/
void
-pm_node_list_prepend(pm_node_list_t *list, pm_node_t *node) {
- if (pm_node_list_grow(list, 1)) {
- memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
- list->nodes[0] = node;
- list->size++;
- }
+pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+ pm_node_list_grow(arena, list, 1);
+ memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
+ list->nodes[0] = node;
+ list->size++;
}
/**
* Concatenate the given node list onto the end of the other node list.
*/
void
-pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other) {
- if (other->size > 0 && pm_node_list_grow(list, other->size)) {
+pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other) {
+ if (other->size > 0) {
+ pm_node_list_grow(arena, list, other->size);
memcpy(list->nodes + list->size, other->nodes, other->size * sizeof(pm_node_t *));
list->size += other->size;
}
}
/**
- * Free the internal memory associated with the given node list.
- */
-void
-pm_node_list_free(pm_node_list_t *list) {
- if (list->capacity > 0) {
- xfree(list->nodes);
- *list = (pm_node_list_t) { 0 };
- }
-}
-
-PRISM_EXPORTED_FUNCTION void
-pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
-
-/**
- * Destroy the nodes that are contained within the given node list.
- */
-static void
-pm_node_list_destroy(pm_parser_t *parser, pm_node_list_t *list) {
- pm_node_t *node;
- PM_NODE_LIST_FOREACH(list, index, node) pm_node_destroy(parser, node);
- pm_node_list_free(list);
-}
-
-/**
- * Deallocate the space for a pm_node_t. Similarly to pm_node_alloc, we're not
- * using the parser argument, but it's there to allow for the future possibility
- * of pre-allocating larger memory pools.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
- switch (PM_NODE_TYPE(node)) {
- <%- nodes.each do |node| -%>
-#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
- case <%= node.type %>: {
- <%- if node.fields.any? { |field| ![Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField].include?(field.class) } -%>
- pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
- <%- end -%>
- <%- node.fields.each do |field| -%>
- <%- case field -%>
- <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::DoubleField -%>
- <%- when Prism::Template::NodeField -%>
- pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
- <%- when Prism::Template::OptionalNodeField -%>
- if (cast-><%= field.name %> != NULL) {
- pm_node_destroy(parser, (pm_node_t *)cast-><%= field.name %>);
- }
- <%- when Prism::Template::StringField -%>
- pm_string_free(&cast-><%= field.name %>);
- <%- when Prism::Template::NodeListField -%>
- pm_node_list_destroy(parser, &cast-><%= field.name %>);
- <%- when Prism::Template::ConstantListField -%>
- pm_constant_id_list_free(&cast-><%= field.name %>);
- <%- when Prism::Template::IntegerField -%>
- pm_integer_free(&cast-><%= field.name %>);
- <%- else -%>
- <%- raise -%>
- <%- end -%>
- <%- end -%>
- break;
- }
- <%- end -%>
-#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
- default:
- assert(false && "unreachable");
- break;
- }
- xfree(node);
-}
-
-/**
* Returns a string representation of the given node type.
*/
-PRISM_EXPORTED_FUNCTION const char *
-pm_node_type_to_str(pm_node_type_t node_type)
+const char *
+pm_node_type(pm_node_type_t node_type)
{
switch (node_type) {
<%- nodes.each do |node| -%>
@@ -166,7 +98,7 @@ pm_node_type_to_str(pm_node_type_t node_type)
* pointer and is passed to the visitor callback for consumers to use as they
* see fit.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
if (visitor(node, data)) pm_visit_child_nodes(node, visitor, data);
}
@@ -176,7 +108,7 @@ pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void
* default behavior for walking the tree that is called from pm_visit_node if
* the callback returns true.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
@@ -212,122 +144,23 @@ pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *nod
break;
}
}
+<%- nodes.each do |node| -%>
-// We optionally support dumping to JSON. For systems that don't want or need
-// this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
-#ifndef PRISM_EXCLUDE_JSON
-
-static void
-pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
- const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
- pm_buffer_append_byte(buffer, '"');
- pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
- pm_buffer_append_byte(buffer, '"');
-}
-
-static void
-pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) {
- uint32_t start = (uint32_t) (location->start - parser->start);
- uint32_t end = (uint32_t) (location->end - parser->start);
- pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end);
-}
-
+<%- params = node.fields.map(&:c_param) -%>
/**
- * Dump JSON to the given buffer.
+ * Allocate and initialize a new <%= node.name %> node.
*/
-PRISM_EXPORTED_FUNCTION void
-pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
- switch (PM_NODE_TYPE(node)) {
- <%- nodes.each do |node| -%>
- case <%= node.type %>: {
- pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
-
- const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
- pm_dump_json_location(buffer, parser, &cast->base.location);
- <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
-
- // Dump the <%= field.name %> field
- pm_buffer_append_byte(buffer, ',');
- pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
- <%- case field -%>
- <%- when Prism::Template::NodeField -%>
- pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
- <%- when Prism::Template::OptionalNodeField -%>
- if (cast-><%= field.name %> != NULL) {
- pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
- } else {
- pm_buffer_append_string(buffer, "null", 4);
- }
- <%- when Prism::Template::NodeListField -%>
- const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
- pm_buffer_append_byte(buffer, '[');
-
- for (size_t index = 0; index < <%= field.name %>->size; index++) {
- if (index != 0) pm_buffer_append_byte(buffer, ',');
- pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
- }
- pm_buffer_append_byte(buffer, ']');
- <%- when Prism::Template::StringField -%>
- const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
- pm_buffer_append_byte(buffer, '"');
- pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
- pm_buffer_append_byte(buffer, '"');
- <%- when Prism::Template::ConstantField -%>
- pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
- <%- when Prism::Template::OptionalConstantField -%>
- if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
- pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
- } else {
- pm_buffer_append_string(buffer, "null", 4);
- }
- <%- when Prism::Template::ConstantListField -%>
- const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
- pm_buffer_append_byte(buffer, '[');
-
- for (size_t index = 0; index < <%= field.name %>->size; index++) {
- if (index != 0) pm_buffer_append_byte(buffer, ',');
- pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
- }
- pm_buffer_append_byte(buffer, ']');
- <%- when Prism::Template::LocationField -%>
- pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
- <%- when Prism::Template::OptionalLocationField -%>
- if (cast-><%= field.name %>.start != NULL) {
- pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
- } else {
- pm_buffer_append_string(buffer, "null", 4);
- }
- <%- when Prism::Template::UInt8Field -%>
- pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
- <%- when Prism::Template::UInt32Field -%>
- pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
- <%- when Prism::Template::Flags -%>
- size_t flags = 0;
- pm_buffer_append_byte(buffer, '[');
- <%- node.flags.values.each_with_index do |value, index| -%>
- if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
- if (flags != 0) pm_buffer_append_byte(buffer, ',');
- pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
- flags++;
- }
- <%- end -%>
- pm_buffer_append_byte(buffer, ']');
- <%- when Prism::Template::IntegerField -%>
- pm_integer_string(buffer, &cast-><%= field.name %>);
- <%- when Prism::Template::DoubleField -%>
- pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
- <%- else -%>
- <%- raise %>
- <%- end -%>
- <%- end -%>
+pm_<%= node.human %>_t *
+pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) {
+ pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t));
+
+ *node = (pm_<%= node.human %>_t) {
+ .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %>
+<%- node.fields.each_with_index do |field, index| -%>
+ .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %>
+<%- end -%>
+ };
- pm_buffer_append_byte(buffer, '}');
- break;
- }
- <%- end -%>
- case PM_SCOPE_NODE:
- break;
- }
+ return node;
}
-
-#endif
+<%- end -%>
diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb
index 639c2fecf3..f12531d934 100644
--- a/prism/templates/src/prettyprint.c.erb
+++ b/prism/templates/src/prettyprint.c.erb
@@ -1,23 +1,34 @@
<%# encoding: ASCII -%>
#include "prism/prettyprint.h"
-// We optionally support pretty printing nodes. For systems that don't want or
-// need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_PRETTYPRINT define.
+/* We optionally support pretty printing nodes. For systems that don't want or
+ * need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_PRETTYPRINT define. */
#ifdef PRISM_EXCLUDE_PRETTYPRINT
-void pm_prettyprint(void) {}
+/* Ensure this translation unit is never empty, even when prettyprint is
+ * excluded. */
+typedef int pm_prettyprint_unused_t;
#else
-static inline void
+#include "prism/compiler/inline.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/parser.h"
+#include "prism/line_offset_list.h"
+
+#include <inttypes.h>
+
+static PRISM_INLINE void
prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
- pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
- pm_line_column_t end = pm_newline_list_line_column(&parser->newline_list, location->end, parser->start_line);
+ pm_line_column_t start = pm_line_offset_list_line_column(&parser->line_offsets, location->start, parser->start_line);
+ pm_line_column_t end = pm_line_offset_list_line_column(&parser->line_offsets, location->start + location->length, parser->start_line);
pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column);
}
-static inline void
+static PRISM_INLINE void
prettyprint_constant(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_constant_id_t constant_id) {
pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
pm_buffer_append_format(output_buffer, ":%.*s", (int) constant->length, constant->start);
@@ -106,17 +117,17 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
- pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
<%- when Prism::Template::OptionalLocationField -%>
pm_location_t *location = &cast-><%= field.name %>;
- if (location->start == NULL) {
+ if (location->length == 0) {
pm_buffer_append_string(output_buffer, " nil\n", 5);
} else {
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
- pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
}
<%- when Prism::Template::UInt8Field -%>
@@ -156,11 +167,11 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
/**
* Pretty-prints the AST represented by the given node to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) {
pm_buffer_t prefix_buffer = { 0 };
prettyprint_node(output_buffer, parser, node, &prefix_buffer);
- pm_buffer_free(&prefix_buffer);
+ pm_buffer_cleanup(&prefix_buffer);
}
#endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index 3e15a11039..3d9811e5db 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -1,57 +1,58 @@
-#include "prism.h"
+#include "prism/excludes.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifdef PRISM_EXCLUDE_SERIALIZATION
+
+/* Ensure this translation unit is never empty, even when serialization is
+ * excluded. */
+typedef int pm_serialize_unused_t;
+
+#else
+
+#include "prism/compiler/inline.h"
-// We optionally support serializing to a binary string. For systems that don't
-// want or need this functionality, it can be turned off with the
-// PRISM_EXCLUDE_SERIALIZATION define.
-#ifndef PRISM_EXCLUDE_SERIALIZATION
+#include "prism/internal/buffer.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+#include "prism/internal/magic_comments.h"
+#include "prism/internal/options.h"
+#include "prism/internal/parser.h"
+#include "prism.h"
+#include "prism/ast.h"
+#include "prism/line_offset_list.h"
+
+#include <assert.h>
#include <stdio.h>
+#include <string.h>
-static inline uint32_t
+static PRISM_INLINE uint32_t
pm_ptrdifft_to_u32(ptrdiff_t value) {
assert(value >= 0 && ((unsigned long) value) < UINT32_MAX);
return (uint32_t) value;
}
-static inline uint32_t
+static PRISM_INLINE uint32_t
pm_sizet_to_u32(size_t value) {
assert(value < UINT32_MAX);
return (uint32_t) value;
}
static void
-pm_serialize_location(const pm_parser_t *parser, const pm_location_t *location, pm_buffer_t *buffer) {
- assert(location->start);
- assert(location->end);
- assert(location->start <= location->end);
-
- pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->start - parser->start));
- pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(location->end - location->start));
+pm_serialize_location(const pm_location_t *location, pm_buffer_t *buffer) {
+ pm_buffer_append_varuint(buffer, location->start);
+ pm_buffer_append_varuint(buffer, location->length);
}
static void
-pm_serialize_string(const pm_parser_t *parser, const pm_string_t *string, pm_buffer_t *buffer) {
- switch (string->type) {
- case PM_STRING_SHARED: {
- pm_buffer_append_byte(buffer, 1);
- pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(pm_string_source(string) - parser->start));
- pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_string_length(string)));
- break;
- }
- case PM_STRING_OWNED:
- case PM_STRING_CONSTANT: {
- uint32_t length = pm_sizet_to_u32(pm_string_length(string));
- pm_buffer_append_byte(buffer, 2);
- pm_buffer_append_varuint(buffer, length);
- pm_buffer_append_bytes(buffer, pm_string_source(string), length);
- break;
- }
-#ifdef PRISM_HAS_MMAP
- case PM_STRING_MAPPED:
- assert(false && "Cannot serialize mapped strings.");
- break;
-#endif
- }
+pm_serialize_string(const pm_string_t *string, pm_buffer_t *buffer) {
+ uint32_t length = pm_sizet_to_u32(pm_string_length(string));
+ pm_buffer_append_varuint(buffer, length);
+ pm_buffer_append_bytes(buffer, pm_string_source(string), length);
}
static void
@@ -72,12 +73,10 @@ static void
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
- size_t offset = buffer->length;
-
<%- if Prism::Template::INCLUDE_NODE_ID -%>
pm_buffer_append_varuint(buffer, node->node_id);
<%- end -%>
- pm_serialize_location(parser, &node->location, buffer);
+ pm_serialize_location(&node->location, buffer);
switch (PM_NODE_TYPE(node)) {
// We do not need to serialize a ScopeNode ever as
@@ -106,7 +105,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
<%- when Prism::Template::StringField -%>
- pm_serialize_string(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ pm_serialize_string(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- when Prism::Template::NodeListField -%>
uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
pm_buffer_append_varuint(buffer, <%= field.name %>_size);
@@ -123,15 +122,15 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
}
<%- when Prism::Template::LocationField -%>
<%- if field.should_be_serialized? -%>
- pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- end -%>
<%- when Prism::Template::OptionalLocationField -%>
<%- if field.should_be_serialized? -%>
- if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
+ if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) {
pm_buffer_append_byte(buffer, 0);
} else {
pm_buffer_append_byte(buffer, 1);
- pm_serialize_location(parser, &((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
<%- end -%>
<%- when Prism::Template::UInt8Field -%>
@@ -148,7 +147,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
<%- end -%>
<%- if node.needs_serialized_length? -%>
// serialize length
- uint32_t length = pm_sizet_to_u32(buffer->length - offset - sizeof(uint32_t));
+ uint32_t length = pm_sizet_to_u32(buffer->length - length_offset);
memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
<%- end -%>
break;
@@ -158,7 +157,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
}
static void
-pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_line_offset_list(pm_line_offset_list_t *list, pm_buffer_t *buffer) {
uint32_t size = pm_sizet_to_u32(list->size);
pm_buffer_append_varuint(buffer, size);
@@ -169,60 +168,60 @@ pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
}
static void
-pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
+pm_serialize_comment(pm_comment_t *comment, pm_buffer_t *buffer) {
// serialize type
pm_buffer_append_byte(buffer, (uint8_t) comment->type);
// serialize location
- pm_serialize_location(parser, &comment->location, buffer);
+ pm_serialize_location(&comment->location, buffer);
}
/**
* Serialize the given list of comments to the given buffer.
*/
void
-pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
pm_comment_t *comment;
for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
- pm_serialize_comment(parser, comment, buffer);
+ pm_serialize_comment(comment, buffer);
}
}
static void
-pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
+pm_serialize_magic_comment(pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
// serialize key location
- pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
- pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->key_length));
+ pm_buffer_append_varuint(buffer, magic_comment->key.start);
+ pm_buffer_append_varuint(buffer, magic_comment->key.length);
// serialize value location
- pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
- pm_buffer_append_varuint(buffer, pm_sizet_to_u32(magic_comment->value_length));
+ pm_buffer_append_varuint(buffer, magic_comment->value.start);
+ pm_buffer_append_varuint(buffer, magic_comment->value.length);
}
static void
-pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_magic_comment_list(pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
pm_magic_comment_t *magic_comment;
for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
- pm_serialize_magic_comment(parser, magic_comment, buffer);
+ pm_serialize_magic_comment(magic_comment, buffer);
}
}
static void
pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
- if (parser->data_loc.end == NULL) {
+ if (parser->data_loc.length == 0) {
pm_buffer_append_byte(buffer, 0);
} else {
pm_buffer_append_byte(buffer, 1);
- pm_serialize_location(parser, &parser->data_loc, buffer);
+ pm_serialize_location(&parser->data_loc, buffer);
}
}
static void
-pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
+pm_serialize_diagnostic(pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
// serialize the type
pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id);
@@ -232,18 +231,18 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
pm_buffer_append_string(buffer, diagnostic->message, message_length);
// serialize location
- pm_serialize_location(parser, &diagnostic->location, buffer);
+ pm_serialize_location(&diagnostic->location, buffer);
pm_buffer_append_byte(buffer, diagnostic->level);
}
static void
-pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
+pm_serialize_diagnostic_list(pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
pm_diagnostic_t *diagnostic;
for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
- pm_serialize_diagnostic(parser, diagnostic, buffer);
+ pm_serialize_diagnostic(diagnostic, buffer);
}
}
@@ -261,14 +260,15 @@ static void
pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
pm_serialize_encoding(parser->encoding, buffer);
pm_buffer_append_varsint(buffer, parser->start_line);
- pm_serialize_newline_list(&parser->newline_list, buffer);
+ pm_serialize_line_offset_list(&parser->line_offsets, buffer);
<%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
- pm_serialize_comment_list(parser, &parser->comment_list, buffer);
+ pm_serialize_comment_list(&parser->comment_list, buffer);
<%- end -%>
- pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
+ pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer);
pm_serialize_data_loc(parser, buffer);
- pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
- pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
+ pm_serialize_diagnostic_list(&parser->error_list, buffer);
+ pm_serialize_diagnostic_list(&parser->warning_list, buffer);
+ pm_buffer_append_byte(buffer, (uint8_t) parser->continuable);
}
#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
@@ -308,28 +308,12 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
pm_constant_t *constant = &parser->constant_pool.constants[bucket->id - 1];
size_t buffer_offset = offset + ((((size_t)bucket->id) - 1) * 8);
- if (bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED || bucket->type == PM_CONSTANT_POOL_BUCKET_CONSTANT) {
- // Since this is an owned or constant constant, we are going to
- // write its contents into the buffer after the constant pool.
- // So effectively in place of the source offset, we have a
- // buffer offset. We will add a leading 1 to indicate that this
- // is a buffer offset.
- uint32_t content_offset = pm_sizet_to_u32(buffer->length);
- uint32_t owned_mask = (uint32_t) (1 << 31);
+ // Write the constant contents into the buffer after the constant
+ // pool. In place of the source offset, we store a buffer offset.
+ uint32_t content_offset = pm_sizet_to_u32(buffer->length);
+ memcpy(buffer->value + buffer_offset, &content_offset, 4);
+ pm_buffer_append_bytes(buffer, constant->start, constant->length);
- assert(content_offset < owned_mask);
- content_offset |= owned_mask;
-
- memcpy(buffer->value + buffer_offset, &content_offset, 4);
- pm_buffer_append_bytes(buffer, constant->start, constant->length);
- } else {
- // Since this is a shared constant, we are going to write its
- // source offset directly into the buffer.
- uint32_t source_offset = pm_ptrdifft_to_u32(constant->start - parser->start);
- memcpy(buffer->value + buffer_offset, &source_offset, 4);
- }
-
- // Now we can write the length of the constant into the buffer.
uint32_t constant_length = pm_sizet_to_u32(constant->length);
memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
}
@@ -337,7 +321,7 @@ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
}
static void
-serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
+serialize_token(pm_parser_t *parser, pm_token_t *token, void *data) {
pm_buffer_t *buffer = (pm_buffer_t *) data;
pm_buffer_append_varuint(buffer, token->type);
@@ -349,58 +333,72 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
/**
* Lex the given source and serialize to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
pm_options_read(&options, data);
+ pm_arena_t arena = { 0 };
pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
+ pm_parser_init(&arena, &parser, source, size, &options);
- pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
- .data = (void *) buffer,
- .callback = serialize_token,
- };
-
- parser.lex_callback = &lex_callback;
- pm_node_t *node = pm_parse(&parser);
+ pm_parser_lex_callback_set(&parser, serialize_token, buffer);
+ pm_parse(&parser);
// Append 0 to mark end of tokens.
pm_buffer_append_byte(buffer, 0);
pm_serialize_metadata(&parser, buffer);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
}
/**
* Parse and serialize both the AST and the tokens represented by the given
* source to the given buffer.
*/
-PRISM_EXPORTED_FUNCTION void
+void
pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
pm_options_read(&options, data);
+ pm_arena_t arena = { 0 };
pm_parser_t parser;
- pm_parser_init(&parser, source, size, &options);
-
- pm_lex_callback_t lex_callback = (pm_lex_callback_t) {
- .data = (void *) buffer,
- .callback = serialize_token,
- };
+ pm_parser_init(&arena, &parser, source, size, &options);
- parser.lex_callback = &lex_callback;
+ pm_parser_lex_callback_set(&parser, serialize_token, buffer);
pm_node_t *node = pm_parse(&parser);
pm_buffer_append_byte(buffer, 0);
pm_serialize(&parser, node, buffer);
- pm_node_destroy(&parser, node);
- pm_parser_free(&parser);
- pm_options_free(&options);
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
+}
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ */
+bool
+pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) {
+ pm_options_t options = { 0 };
+ pm_options_read(&options, data);
+
+ pm_arena_t arena = { 0 };
+ pm_parser_t parser;
+ pm_parser_init(&arena, &parser, source, size, &options);
+
+ pm_parse(&parser);
+
+ bool result = parser.error_list.size == 0;
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
+
+ return result;
}
#endif
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/tokens.c.erb
index f196393ee1..1e82954738 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/tokens.c.erb
@@ -1,12 +1,12 @@
-#include <string.h>
-
#include "prism/ast.h"
+#include <assert.h>
+
/**
* Returns a string representation of the given token type.
*/
-PRISM_EXPORTED_FUNCTION const char *
-pm_token_type_name(pm_token_type_t token_type) {
+const char *
+pm_token_type(pm_token_type_t token_type) {
switch (token_type) {
<%- tokens.each do |token| -%>
case PM_TOKEN_<%= token.name %>:
@@ -27,14 +27,10 @@ pm_token_type_name(pm_token_type_t token_type) {
* Returns the human name of the given token type.
*/
const char *
-pm_token_type_human(pm_token_type_t token_type) {
+pm_token_str(pm_token_type_t token_type) {
switch (token_type) {
case PM_TOKEN_EOF:
return "end-of-input";
- case PM_TOKEN_MISSING:
- return "missing token";
- case PM_TOKEN_NOT_PROVIDED:
- return "not provided token";
case PM_TOKEN_AMPERSAND:
return "'&'";
case PM_TOKEN_AMPERSAND_AMPERSAND:
@@ -171,6 +167,8 @@ pm_token_type_human(pm_token_type_t token_type) {
return "'defined?'";
case PM_TOKEN_KEYWORD_DO:
return "'do'";
+ case PM_TOKEN_KEYWORD_DO_BLOCK:
+ return "'do'";
case PM_TOKEN_KEYWORD_DO_LOOP:
return "'do'";
case PM_TOKEN_KEYWORD_ELSE:
@@ -362,8 +360,8 @@ pm_token_type_human(pm_token_type_t token_type) {
return "";
}
- // Provide a default, because some compilers can't determine that the above
- // switch is exhaustive.
+ /* Provide a default, because some compilers cannot determine that the above
+ * switch is exhaustive. */
assert(false && "unreachable");
return "";
}
diff --git a/prism/templates/template.rb b/prism/templates/template.rb
index 30cb60cabd..0fdeda561f 100755
--- a/prism/templates/template.rb
+++ b/prism/templates/template.rb
@@ -6,13 +6,12 @@ require "fileutils"
require "yaml"
module Prism
- module Template
+ module Template # :nodoc: all
SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
- REMOVE_ON_ERROR_TYPES = SERIALIZE_ONLY_SEMANTICS_FIELDS
CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
- JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "truffleruby"
- JAVA_STRING_TYPE = JAVA_BACKEND == "jruby" ? "org.jruby.RubySymbol" : "String"
+ JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default"
+ JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]"
INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
COMMON_FLAGS_COUNT = 2
@@ -49,6 +48,14 @@ module Prism
end
end
+ # This module contains methods for escaping characters in Doxygen comments.
+ module Doxygen
+ # Similar to /verbatim ... /endverbatim but doesn't wrap the result in a code block.
+ def self.verbatim(value)
+ value.gsub(/[*%!`#<>_+@-]/, '\\\\\0')
+ end
+ end
+
# A comment attached to a field or node.
class ConfigComment
attr_reader :value
@@ -97,6 +104,11 @@ module Prism
# Some node fields can be specialized if they point to a specific kind of
# node and not just a generic node.
class NodeKindField < Field
+ # The C type to use for this field as a function parameter.
+ def c_param
+ "struct #{c_type} *#{name}"
+ end
+
def initialize(kind:, **options)
@kind = kind
super(**options)
@@ -142,27 +154,27 @@ module Prism
if specific_kind
specific_kind
elsif union_kind
- union_kind.join(" | ")
+ "(#{union_kind.join(" | ")})"
else
"Prism::node"
end
end
- def rbi_class
+ def call_seq_type
if specific_kind
- "Prism::#{specific_kind}"
+ specific_kind
elsif union_kind
- "T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})"
+ union_kind.join(" | ")
else
- "Prism::Node"
+ "Node"
end
end
def check_field_kind
if union_kind
- "[#{union_kind.join(', ')}].include?(#{name}.class)"
+ "[#{union_kind.join(', ')}, ErrorRecoveryNode].include?(#{name}.class)"
else
- "#{name}.is_a?(#{ruby_type})"
+ "#{name}.is_a?(#{ruby_type}) || #{name}.is_a?(ErrorRecoveryNode)"
end
end
end
@@ -174,27 +186,27 @@ module Prism
if specific_kind
"#{specific_kind}?"
elsif union_kind
- [*union_kind, "nil"].join(" | ")
+ "(#{union_kind.join(" | ")})?"
else
"Prism::node?"
end
end
- def rbi_class
+ def call_seq_type
if specific_kind
- "T.nilable(Prism::#{specific_kind})"
+ "#{specific_kind} | nil"
elsif union_kind
- "T.nilable(T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")}))"
+ [*union_kind, "nil"].join(" | ")
else
- "T.nilable(Prism::Node)"
+ "Node | nil"
end
end
def check_field_kind
if union_kind
- "[#{union_kind.join(', ')}, NilClass].include?(#{name}.class)"
+ "[#{union_kind.join(', ')}, ErrorRecoveryNode, NilClass].include?(#{name}.class)"
else
- "#{name}.nil? || #{name}.is_a?(#{ruby_type})"
+ "#{name}.nil? || #{name}.is_a?(#{ruby_type}) || #{name}.is_a?(ErrorRecoveryNode)"
end
end
end
@@ -202,23 +214,31 @@ module Prism
# This represents a field on a node that is a list of nodes. We pass them as
# references and store them directly on the struct.
class NodeListField < NodeKindField
- def rbs_class
+ def c_param
+ "pm_node_list_t #{name}"
+ end
+
+ def element_rbs_class
if specific_kind
- "Array[#{specific_kind}]"
+ "#{specific_kind}"
elsif union_kind
- "Array[#{union_kind.join(" | ")}]"
+ "#{union_kind.join(" | ")}"
else
- "Array[Prism::node]"
+ "Prism::node"
end
end
- def rbi_class
+ def rbs_class
+ "Array[#{element_rbs_class}]"
+ end
+
+ def call_seq_type
if specific_kind
- "T::Array[Prism::#{specific_kind}]"
+ "Array[#{specific_kind}]"
elsif union_kind
- "T::Array[T.any(#{union_kind.map { |kind| "Prism::#{kind}" }.join(", ")})]"
+ "Array[#{union_kind.join(" | ")}]"
else
- "T::Array[Prism::Node]"
+ "Array[Node]"
end
end
@@ -228,9 +248,9 @@ module Prism
def check_field_kind
if union_kind
- "#{name}.all? { |n| [#{union_kind.join(', ')}].include?(n.class) }"
+ "#{name}.all? { |n| [#{union_kind.join(', ')}, ErrorRecoveryNode].include?(n.class) }"
else
- "#{name}.all? { |n| n.is_a?(#{ruby_type}) }"
+ "#{name}.all? { |n| n.is_a?(#{ruby_type}) || n.is_a?(ErrorRecoveryNode) }"
end
end
end
@@ -238,58 +258,74 @@ module Prism
# This represents a field on a node that is the ID of a string interned
# through the parser's constant pool.
class ConstantField < Field
+ def c_param
+ "pm_constant_id_t #{name}"
+ end
+
def rbs_class
"Symbol"
end
- def rbi_class
+ def call_seq_type
"Symbol"
end
def java_type
- JAVA_STRING_TYPE
+ JAVA_IDENTIFIER_TYPE
end
end
# This represents a field on a node that is the ID of a string interned
# through the parser's constant pool and can be optionally null.
class OptionalConstantField < Field
+ def c_param
+ "pm_constant_id_t #{name}"
+ end
+
def rbs_class
"Symbol?"
end
- def rbi_class
- "T.nilable(Symbol)"
+ def call_seq_type
+ "Symbol | nil"
end
def java_type
- JAVA_STRING_TYPE
+ JAVA_IDENTIFIER_TYPE
end
end
# This represents a field on a node that is a list of IDs that are associated
# with strings interned through the parser's constant pool.
class ConstantListField < Field
+ def c_param
+ "pm_constant_id_list_t #{name}"
+ end
+
def rbs_class
"Array[Symbol]"
end
- def rbi_class
- "T::Array[Symbol]"
+ def call_seq_type
+ "Array[Symbol]"
end
def java_type
- "#{JAVA_STRING_TYPE}[]"
+ "#{JAVA_IDENTIFIER_TYPE}[]"
end
end
# This represents a field on a node that is a string.
class StringField < Field
+ def c_param
+ "pm_string_t #{name}"
+ end
+
def rbs_class
"String"
end
- def rbi_class
+ def call_seq_type
"String"
end
@@ -300,6 +336,10 @@ module Prism
# This represents a field on a node that is a location.
class LocationField < Field
+ def c_param
+ "pm_location_t #{name}"
+ end
+
def semantic_field?
false
end
@@ -308,8 +348,8 @@ module Prism
"Location"
end
- def rbi_class
- "Prism::Location"
+ def call_seq_type
+ "Location"
end
def java_type
@@ -319,6 +359,10 @@ module Prism
# This represents a field on a node that is a location that is optional.
class OptionalLocationField < Field
+ def c_param
+ "pm_location_t #{name}"
+ end
+
def semantic_field?
false
end
@@ -327,8 +371,8 @@ module Prism
"Location?"
end
- def rbi_class
- "T.nilable(Prism::Location)"
+ def call_seq_type
+ "Location | nil"
end
def java_type
@@ -338,11 +382,15 @@ module Prism
# This represents an integer field.
class UInt8Field < Field
+ def c_param
+ "uint8_t #{name}"
+ end
+
def rbs_class
"Integer"
end
- def rbi_class
+ def call_seq_type
"Integer"
end
@@ -353,11 +401,15 @@ module Prism
# This represents an integer field.
class UInt32Field < Field
+ def c_param
+ "uint32_t #{name}"
+ end
+
def rbs_class
"Integer"
end
- def rbi_class
+ def call_seq_type
"Integer"
end
@@ -369,11 +421,15 @@ module Prism
# This represents an arbitrarily-sized integer. When it gets to Ruby it will
# be an Integer.
class IntegerField < Field
+ def c_param
+ "pm_integer_t #{name}"
+ end
+
def rbs_class
"Integer"
end
- def rbi_class
+ def call_seq_type
"Integer"
end
@@ -385,11 +441,15 @@ module Prism
# This represents a double-precision floating point number. When it gets to
# Ruby it will be a Float.
class DoubleField < Field
+ def c_param
+ "double #{name}"
+ end
+
def rbs_class
"Float"
end
- def rbi_class
+ def call_seq_type
"Float"
end
@@ -432,9 +492,6 @@ module Prism
when "pattern expression"
# the list of all possible types is too long with 37+ different classes
"Node"
- when Hash
- kind = kind.fetch("on error")
- REMOVE_ON_ERROR_TYPES ? nil : kind
else
kind
end
@@ -547,33 +604,17 @@ module Prism
extension = File.extname(filepath.gsub(".erb", ""))
heading =
- case extension
- when ".rb"
+ if extension == ".rb"
<<~HEADING
# frozen_string_literal: true
+ # :markup: markdown
=begin
+ --
This file is generated by the templates/template.rb script and should not be
modified manually. See #{filepath}
if you are looking to modify the template
- =end
-
- HEADING
- when ".rbs"
- <<~HEADING
- # This file is generated by the templates/template.rb script and should not be
- # modified manually. See #{filepath}
- # if you are looking to modify the template
-
- HEADING
- when ".rbi"
- <<~HEADING
- # typed: strict
-
- =begin
- This file is generated by the templates/template.rb script and should not be
- modified manually. See #{filepath}
- if you are looking to modify the template
+ ++
=end
HEADING
@@ -582,7 +623,7 @@ module Prism
/*----------------------------------------------------------------------------*/
/* This file is generated by the templates/template.rb script and should not */
/* be modified manually. See */
- /* #{filepath + " " * (74 - filepath.size) } */
+ /* #{filepath.ljust(74)} */
/* if you are looking to modify the */
/* template */
/*----------------------------------------------------------------------------*/
@@ -602,8 +643,14 @@ module Prism
end
end
- FileUtils.mkdir_p(File.dirname(write_to))
- File.write(write_to, contents)
+ begin
+ FileUtils.mkdir_p(File.dirname(write_to))
+ File.write(write_to, contents)
+ rescue SystemCallError # EACCES, EPERM, EROFS, etc.
+ # Fall back to the current directory
+ FileUtils.mkdir_p(File.dirname(name))
+ File.write(name, contents)
+ end
end
private
@@ -639,13 +686,13 @@ module Prism
TEMPLATES = [
"ext/prism/api_node.c",
"include/prism/ast.h",
- "include/prism/diagnostic.h",
+ "include/prism/internal/diagnostic.h",
"javascript/src/deserialize.js",
"javascript/src/nodes.js",
"javascript/src/visitor.js",
- "java/org/prism/Loader.java",
- "java/org/prism/Nodes.java",
- "java/org/prism/AbstractNodeVisitor.java",
+ "java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java",
+ "java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java",
+ "java/api/src/main/java-templates/org/ruby_lang/prism/AbstractNodeVisitor.java",
"lib/prism/compiler.rb",
"lib/prism/dispatcher.rb",
"lib/prism/dot_visitor.rb",
@@ -657,19 +704,11 @@ module Prism
"lib/prism/serialize.rb",
"lib/prism/visitor.rb",
"src/diagnostic.c",
+ "src/json.c",
"src/node.c",
"src/prettyprint.c",
"src/serialize.c",
- "src/token_type.c",
- "rbi/prism/dsl.rbi",
- "rbi/prism/node.rbi",
- "rbi/prism/visitor.rbi",
- "sig/prism.rbs",
- "sig/prism/dsl.rbs",
- "sig/prism/mutation_compiler.rbs",
- "sig/prism/node.rbs",
- "sig/prism/visitor.rbs",
- "sig/prism/_private/dot_visitor.rbs"
+ "src/tokens.c"
]
end
end
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
deleted file mode 100644
index f3c20ab2a5..0000000000
--- a/prism/util/pm_buffer.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/**
- * @file pm_buffer.h
- *
- * A wrapper around a contiguous block of allocated memory.
- */
-#ifndef PRISM_BUFFER_H
-#define PRISM_BUFFER_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_char.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * A pm_buffer_t is a simple memory buffer that stores data in a contiguous
- * block of memory.
- */
-typedef struct {
- /** The length of the buffer in bytes. */
- size_t length;
-
- /** The capacity of the buffer in bytes that has been allocated. */
- size_t capacity;
-
- /** A pointer to the start of the buffer. */
- char *value;
-} pm_buffer_t;
-
-/**
- * Return the size of the pm_buffer_t struct.
- *
- * @returns The size of the pm_buffer_t struct.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_sizeof(void);
-
-/**
- * Initialize a pm_buffer_t with the given capacity.
- *
- * @param buffer The buffer to initialize.
- * @param capacity The capacity of the buffer.
- * @returns True if the buffer was initialized successfully, false otherwise.
- */
-bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity);
-
-/**
- * Initialize a pm_buffer_t with its default values.
- *
- * @param buffer The buffer to initialize.
- * @returns True if the buffer was initialized successfully, false otherwise.
- */
-PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer);
-
-/**
- * Return the value of the buffer.
- *
- * @param buffer The buffer to get the value of.
- * @returns The value of the buffer.
- */
-PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer);
-
-/**
- * Return the length of the buffer.
- *
- * @param buffer The buffer to get the length of.
- * @returns The length of the buffer.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer);
-
-/**
- * Append the given amount of space as zeroes to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param length The amount of space to append and zero.
- */
-void pm_buffer_append_zeroes(pm_buffer_t *buffer, size_t length);
-
-/**
- * Append a formatted string to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param format The format string to append.
- * @param ... The arguments to the format string.
- */
-void pm_buffer_append_format(pm_buffer_t *buffer, const char *format, ...) PRISM_ATTRIBUTE_FORMAT(2, 3);
-
-/**
- * Append a string to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The string to append.
- * @param length The length of the string to append.
- */
-void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length);
-
-/**
- * Append a list of bytes to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The bytes to append.
- * @param length The length of the bytes to append.
- */
-void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length);
-
-/**
- * Append a single byte to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The byte to append.
- */
-void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
-
-/**
- * Append a 32-bit unsigned integer to the buffer as a variable-length integer.
- *
- * @param buffer The buffer to append to.
- * @param value The integer to append.
- */
-void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
-
-/**
- * Append a 32-bit signed integer to the buffer as a variable-length integer.
- *
- * @param buffer The buffer to append to.
- * @param value The integer to append.
- */
-void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
-
-/**
- * Append a double to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The double to append.
- */
-void pm_buffer_append_double(pm_buffer_t *buffer, double value);
-
-/**
- * Append a unicode codepoint to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param value The character to append.
- * @returns True if the codepoint was valid and appended successfully, false
- * otherwise.
- */
-bool pm_buffer_append_unicode_codepoint(pm_buffer_t *buffer, uint32_t value);
-
-/**
- * The different types of escaping that can be performed by the buffer when
- * appending a slice of Ruby source code.
- */
-typedef enum {
- PM_BUFFER_ESCAPING_RUBY,
- PM_BUFFER_ESCAPING_JSON
-} pm_buffer_escaping_t;
-
-/**
- * Append a slice of source code to the buffer.
- *
- * @param buffer The buffer to append to.
- * @param source The source code to append.
- * @param length The length of the source code to append.
- * @param escaping The type of escaping to perform.
- */
-void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
-
-/**
- * Prepend the given string to the buffer.
- *
- * @param buffer The buffer to prepend to.
- * @param value The string to prepend.
- * @param length The length of the string to prepend.
- */
-void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
-
-/**
- * Concatenate one buffer onto another.
- *
- * @param destination The buffer to concatenate onto.
- * @param source The buffer to concatenate.
- */
-void pm_buffer_concat(pm_buffer_t *destination, const pm_buffer_t *source);
-
-/**
- * Clear the buffer by reducing its size to 0. This does not free the allocated
- * memory, but it does allow the buffer to be reused.
- *
- * @param buffer The buffer to clear.
- */
-void pm_buffer_clear(pm_buffer_t *buffer);
-
-/**
- * Strip the whitespace from the end of the buffer.
- *
- * @param buffer The buffer to strip.
- */
-void pm_buffer_rstrip(pm_buffer_t *buffer);
-
-/**
- * Checks if the buffer includes the given value.
- *
- * @param buffer The buffer to check.
- * @param value The value to check for.
- * @returns The index of the first occurrence of the value in the buffer, or
- * SIZE_MAX if the value is not found.
- */
-size_t pm_buffer_index(const pm_buffer_t *buffer, char value);
-
-/**
- * Insert the given string into the buffer at the given index.
- *
- * @param buffer The buffer to insert into.
- * @param index The index to insert at.
- * @param value The string to insert.
- * @param length The length of the string to insert.
- */
-void pm_buffer_insert(pm_buffer_t *buffer, size_t index, const char *value, size_t length);
-
-/**
- * Free the memory associated with the buffer.
- *
- * @param buffer The buffer to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer);
-
-#endif
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
deleted file mode 100644
index deeafd6321..0000000000
--- a/prism/util/pm_char.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/**
- * @file pm_char.h
- *
- * Functions for working with characters and strings.
- */
-#ifndef PRISM_CHAR_H
-#define PRISM_CHAR_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_newline_list.h"
-
-#include <stdbool.h>
-#include <stddef.h>
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are
- * whitespace.
- */
-size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are
- * whitespace while also tracking the location of each newline. Disallows
- * searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param newline_list The list of newlines to populate.
- * @return The number of characters at the start of the string that are
- * whitespace.
- */
-size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
-
-/**
- * Returns the number of characters at the start of the string that are inline
- * whitespace. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are inline
- * whitespace.
- */
-size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are decimal
- * digits.
- */
-size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits. Disallows searching past the given maximum number of
- * characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are
- * hexadecimal digits.
- */
-size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are octal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are octal
- * digits or underscores.
- */
-size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are decimal
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are decimal
- * digits or underscores.
- */
-size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are
- * hexadecimal digits or underscores. Disallows searching past the given maximum
- * number of characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are
- * hexadecimal digits or underscores.
- */
-size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns the number of characters at the start of the string that are regexp
- * options. Disallows searching past the given maximum number of characters.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @return The number of characters at the start of the string that are regexp
- * options.
- */
-size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
-
-/**
- * Returns the number of characters at the start of the string that are binary
- * digits or underscores. Disallows searching past the given maximum number of
- * characters.
- *
- * If multiple underscores are found in a row or if an underscore is
- * found at the end of the number, then the invalid pointer is set to the index
- * of the first invalid underscore.
- *
- * @param string The string to search.
- * @param length The maximum number of characters to search.
- * @param invalid The pointer to set to the index of the first invalid
- * underscore.
- * @return The number of characters at the start of the string that are binary
- * digits or underscores.
- */
-size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
-
-/**
- * Returns true if the given character is a whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is a whitespace character.
- */
-bool pm_char_is_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is an inline whitespace character.
- *
- * @param b The character to check.
- * @return True if the given character is an inline whitespace character.
- */
-bool pm_char_is_inline_whitespace(const uint8_t b);
-
-/**
- * Returns true if the given character is a binary digit.
- *
- * @param b The character to check.
- * @return True if the given character is a binary digit.
- */
-bool pm_char_is_binary_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is an octal digit.
- *
- * @param b The character to check.
- * @return True if the given character is an octal digit.
- */
-bool pm_char_is_octal_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is a decimal digit.
- *
- * @param b The character to check.
- * @return True if the given character is a decimal digit.
- */
-bool pm_char_is_decimal_digit(const uint8_t b);
-
-/**
- * Returns true if the given character is a hexadecimal digit.
- *
- * @param b The character to check.
- * @return True if the given character is a hexadecimal digit.
- */
-bool pm_char_is_hexadecimal_digit(const uint8_t b);
-
-#endif
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
deleted file mode 100644
index 6df23f8f50..0000000000
--- a/prism/util/pm_constant_pool.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/**
- * @file pm_constant_pool.h
- *
- * A data structure that stores a set of strings.
- *
- * Each string is assigned a unique id, which can be used to compare strings for
- * equality. This comparison ends up being much faster than strcmp, since it
- * only requires a single integer comparison.
- */
-#ifndef PRISM_CONSTANT_POOL_H
-#define PRISM_CONSTANT_POOL_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-/**
- * When we allocate constants into the pool, we reserve 0 to mean that the slot
- * is not yet filled. This constant is reused in other places to indicate the
- * lack of a constant id.
- */
-#define PM_CONSTANT_ID_UNSET 0
-
-/**
- * A constant id is a unique identifier for a constant in the constant pool.
- */
-typedef uint32_t pm_constant_id_t;
-
-/**
- * A list of constant IDs. Usually used to represent a set of locals.
- */
-typedef struct {
- /** The number of constant ids in the list. */
- size_t size;
-
- /** The number of constant ids that have been allocated in the list. */
- size_t capacity;
-
- /** The constant ids in the list. */
- pm_constant_id_t *ids;
-} pm_constant_id_list_t;
-
-/**
- * Initialize a list of constant ids.
- *
- * @param list The list to initialize.
- */
-void pm_constant_id_list_init(pm_constant_id_list_t *list);
-
-/**
- * Initialize a list of constant ids with a given capacity.
- *
- * @param list The list to initialize.
- * @param capacity The initial capacity of the list.
- */
-void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
-
-/**
- * Append a constant id to a list of constant ids. Returns false if any
- * potential reallocations fail.
- *
- * @param list The list to append to.
- * @param id The id to append.
- * @return Whether the append succeeded.
- */
-bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
-
-/**
- * Insert a constant id into a list of constant ids at the specified index.
- *
- * @param list The list to insert into.
- * @param index The index at which to insert.
- * @param id The id to insert.
- */
-void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
-
-/**
- * Checks if the current constant id list includes the given constant id.
- *
- * @param list The list to check.
- * @param id The id to check for.
- * @return Whether the list includes the given id.
- */
-bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id);
-
-/**
- * Free the memory associated with a list of constant ids.
- *
- * @param list The list to free.
- */
-void pm_constant_id_list_free(pm_constant_id_list_t *list);
-
-/**
- * The type of bucket in the constant pool hash map. This determines how the
- * bucket should be freed.
- */
-typedef unsigned int pm_constant_pool_bucket_type_t;
-
-/** By default, each constant is a slice of the source. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_DEFAULT = 0;
-
-/** An owned constant is one for which memory has been allocated. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_OWNED = 1;
-
-/** A constant constant is known at compile time. */
-static const pm_constant_pool_bucket_type_t PM_CONSTANT_POOL_BUCKET_CONSTANT = 2;
-
-/** A bucket in the hash map. */
-typedef struct {
- /** The incremental ID used for indexing back into the pool. */
- unsigned int id: 30;
-
- /** The type of the bucket, which determines how to free it. */
- pm_constant_pool_bucket_type_t type: 2;
-
- /** The hash of the bucket. */
- uint32_t hash;
-} pm_constant_pool_bucket_t;
-
-/** A constant in the pool which effectively stores a string. */
-typedef struct {
- /** A pointer to the start of the string. */
- const uint8_t *start;
-
- /** The length of the string. */
- size_t length;
-} pm_constant_t;
-
-/** The overall constant pool, which stores constants found while parsing. */
-typedef struct {
- /** The buckets in the hash map. */
- pm_constant_pool_bucket_t *buckets;
-
- /** The constants that are stored in the buckets. */
- pm_constant_t *constants;
-
- /** The number of buckets in the hash map. */
- uint32_t size;
-
- /** The number of buckets that have been allocated in the hash map. */
- uint32_t capacity;
-} pm_constant_pool_t;
-
-/**
- * Initialize a new constant pool with a given capacity.
- *
- * @param pool The pool to initialize.
- * @param capacity The initial capacity of the pool.
- * @return Whether the initialization succeeded.
- */
-bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
-
-/**
- * Return a pointer to the constant indicated by the given constant id.
- *
- * @param pool The pool to get the constant from.
- * @param constant_id The id of the constant to get.
- * @return A pointer to the constant.
- */
-pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
-
-/**
- * Find a constant in a constant pool. Returns the id of the constant, or 0 if
- * the constant is not found.
- *
- * @param pool The pool to find the constant in.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool that is a slice of a source string.
- * Returns the id of the constant, or 0 if any potential calls to resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool from memory that is now owned by the
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
- * resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length);
-
-/**
- * Insert a constant into a constant pool from memory that is constant. Returns
- * the id of the constant, or 0 if any potential calls to resize fail.
- *
- * @param pool The pool to insert the constant into.
- * @param start A pointer to the start of the constant.
- * @param length The length of the constant.
- * @return The id of the constant.
- */
-pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
-
-/**
- * Free the memory associated with a constant pool.
- *
- * @param pool The pool to free.
- */
-void pm_constant_pool_free(pm_constant_pool_t *pool);
-
-#endif
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
deleted file mode 100644
index a9e2966703..0000000000
--- a/prism/util/pm_integer.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * @file pm_integer.h
- *
- * This module provides functions for working with arbitrary-sized integers.
- */
-#ifndef PRISM_NUMBER_H
-#define PRISM_NUMBER_H
-
-#include "prism/defines.h"
-#include "prism/util/pm_buffer.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-/**
- * A structure represents an arbitrary-sized integer.
- */
-typedef struct {
- /**
- * The number of allocated values. length is set to 0 if the integer fits
- * into uint32_t.
- */
- size_t length;
-
- /**
- * List of 32-bit integers. Set to NULL if the integer fits into uint32_t.
- */
- uint32_t *values;
-
- /**
- * Embedded value for small integer. This value is set to 0 if the value
- * does not fit into uint32_t.
- */
- uint32_t value;
-
- /**
- * Whether or not the integer is negative. It is stored this way so that a
- * zeroed pm_integer_t is always positive zero.
- */
- bool negative;
-} pm_integer_t;
-
-/**
- * An enum controlling the base of an integer. It is expected that the base is
- * already known before parsing the integer, even though it could be derived
- * from the string itself.
- */
-typedef enum {
- /** The default decimal base, with no prefix. Leading 0s will be ignored. */
- PM_INTEGER_BASE_DEFAULT,
-
- /** The binary base, indicated by a 0b or 0B prefix. */
- PM_INTEGER_BASE_BINARY,
-
- /** The octal base, indicated by a 0, 0o, or 0O prefix. */
- PM_INTEGER_BASE_OCTAL,
-
- /** The decimal base, indicated by a 0d, 0D, or empty prefix. */
- PM_INTEGER_BASE_DECIMAL,
-
- /** The hexadecimal base, indicated by a 0x or 0X prefix. */
- PM_INTEGER_BASE_HEXADECIMAL,
-
- /**
- * An unknown base, in which case pm_integer_parse will derive it based on
- * the content of the string. This is less efficient and does more
- * comparisons, so if callers know the base ahead of time, they should use
- * that instead.
- */
- PM_INTEGER_BASE_UNKNOWN
-} pm_integer_base_t;
-
-/**
- * Parse an integer from a string. This assumes that the format of the integer
- * has already been validated, as internal validation checks are not performed
- * here.
- *
- * @param integer The integer to parse into.
- * @param base The base of the integer.
- * @param start The start of the string.
- * @param end The end of the string.
- */
-void pm_integer_parse(pm_integer_t *integer, pm_integer_base_t base, const uint8_t *start, const uint8_t *end);
-
-/**
- * Compare two integers. This function returns -1 if the left integer is less
- * than the right integer, 0 if they are equal, and 1 if the left integer is
- * greater than the right integer.
- *
- * @param left The left integer to compare.
- * @param right The right integer to compare.
- * @return The result of the comparison.
- */
-int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
-
-/**
- * Reduce a ratio of integers to its simplest form.
- *
- * If either the numerator or denominator do not fit into a 32-bit integer, then
- * this function is a no-op. In the future, we may consider reducing even the
- * larger numbers, but for now we're going to keep it simple.
- *
- * @param numerator The numerator of the ratio.
- * @param denominator The denominator of the ratio.
- */
-void pm_integers_reduce(pm_integer_t *numerator, pm_integer_t *denominator);
-
-/**
- * Convert an integer to a decimal string.
- *
- * @param buffer The buffer to append the string to.
- * @param integer The integer to convert to a string.
- */
-PRISM_EXPORTED_FUNCTION void pm_integer_string(pm_buffer_t *buffer, const pm_integer_t *integer);
-
-/**
- * Free the internal memory of an integer. This memory will only be allocated if
- * the integer exceeds the size of a single node in the linked list.
- *
- * @param integer The integer to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_integer_free(pm_integer_t *integer);
-
-#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
deleted file mode 100644
index ad2294cd60..0000000000
--- a/prism/util/pm_list.c
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "prism/util/pm_list.h"
-
-/**
- * Returns true if the given list is empty.
- */
-PRISM_EXPORTED_FUNCTION bool
-pm_list_empty_p(pm_list_t *list) {
- return list->head == NULL;
-}
-
-/**
- * Returns the size of the list.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_list_size(pm_list_t *list) {
- return list->size;
-}
-
-/**
- * Append a node to the given list.
- */
-void
-pm_list_append(pm_list_t *list, pm_list_node_t *node) {
- if (list->head == NULL) {
- list->head = node;
- } else {
- list->tail->next = node;
- }
-
- list->tail = node;
- list->size++;
-}
-
-/**
- * Deallocate the internal state of the given list.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_list_free(pm_list_t *list) {
- pm_list_node_t *node = list->head;
- pm_list_node_t *next;
-
- while (node != NULL) {
- next = node->next;
- xfree(node);
- node = next;
- }
-
- list->size = 0;
-}
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
deleted file mode 100644
index e0671eaed3..0000000000
--- a/prism/util/pm_memchr.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * @file pm_memchr.h
- *
- * A custom memchr implementation.
- */
-#ifndef PRISM_MEMCHR_H
-#define PRISM_MEMCHR_H
-
-#include "prism/defines.h"
-#include "prism/encoding.h"
-
-#include <stddef.h>
-
-/**
- * We need to roll our own memchr to handle cases where the encoding changes and
- * we need to search for a character in a buffer that could be the trailing byte
- * of a multibyte character.
- *
- * @param source The source string.
- * @param character The character to search for.
- * @param number The maximum number of bytes to search.
- * @param encoding_changed Whether the encoding changed.
- * @param encoding A pointer to the encoding.
- * @return A pointer to the first occurrence of the character in the source
- * string, or NULL if no such character exists.
- */
-void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding);
-
-#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
deleted file mode 100644
index 8331618f54..0000000000
--- a/prism/util/pm_newline_list.c
+++ /dev/null
@@ -1,125 +0,0 @@
-#include "prism/util/pm_newline_list.h"
-
-/**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
- */
-bool
-pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
- list->offsets = (size_t *) xcalloc(capacity, sizeof(size_t));
- if (list->offsets == NULL) return false;
-
- list->start = start;
-
- // This is 1 instead of 0 because we want to include the first line of the
- // file as having offset 0, which is set because of calloc.
- list->size = 1;
- list->capacity = capacity;
-
- return true;
-}
-
-/**
- * Clear out the newlines that have been appended to the list.
- */
-void
-pm_newline_list_clear(pm_newline_list_t *list) {
- list->size = 1;
-}
-
-/**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
- */
-bool
-pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
- if (list->size == list->capacity) {
- size_t *original_offsets = list->offsets;
-
- list->capacity = (list->capacity * 3) / 2;
- list->offsets = (size_t *) xcalloc(list->capacity, sizeof(size_t));
- if (list->offsets == NULL) return false;
-
- memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
- xfree(original_offsets);
- }
-
- assert(*cursor == '\n');
- assert(cursor >= list->start);
- size_t newline_offset = (size_t) (cursor - list->start + 1);
-
- assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
- list->offsets[list->size++] = newline_offset;
-
- return true;
-}
-
-/**
- * Returns the line of the given offset. If the offset is not in the list, the
- * line of the closest offset less than the given offset is returned.
- */
-int32_t
-pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
- assert(cursor >= list->start);
- size_t offset = (size_t) (cursor - list->start);
-
- size_t left = 0;
- size_t right = list->size - 1;
-
- while (left <= right) {
- size_t mid = left + (right - left) / 2;
-
- if (list->offsets[mid] == offset) {
- return ((int32_t) mid) + start_line;
- }
-
- if (list->offsets[mid] < offset) {
- left = mid + 1;
- } else {
- right = mid - 1;
- }
- }
-
- return ((int32_t) left) + start_line - 1;
-}
-
-/**
- * Returns the line and column of the given offset. If the offset is not in the
- * list, the line and column of the closest offset less than the given offset
- * are returned.
- */
-pm_line_column_t
-pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line) {
- assert(cursor >= list->start);
- size_t offset = (size_t) (cursor - list->start);
-
- size_t left = 0;
- size_t right = list->size - 1;
-
- while (left <= right) {
- size_t mid = left + (right - left) / 2;
-
- if (list->offsets[mid] == offset) {
- return ((pm_line_column_t) { ((int32_t) mid) + start_line, 0 });
- }
-
- if (list->offsets[mid] < offset) {
- left = mid + 1;
- } else {
- right = mid - 1;
- }
- }
-
- return ((pm_line_column_t) {
- .line = ((int32_t) left) + start_line - 1,
- .column = (uint32_t) (offset - list->offsets[left - 1])
- });
-}
-
-/**
- * Free the internal memory allocated for the newline list.
- */
-void
-pm_newline_list_free(pm_newline_list_t *list) {
- xfree(list->offsets);
-}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
deleted file mode 100644
index 406abe8ba5..0000000000
--- a/prism/util/pm_newline_list.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * @file pm_newline_list.h
- *
- * A list of byte offsets of newlines in a string.
- *
- * When compiling the syntax tree, it's necessary to know the line and column
- * of many nodes. This is necessary to support things like error messages,
- * tracepoints, etc.
- *
- * It's possible that we could store the start line, start column, end line, and
- * end column on every node in addition to the offsets that we already store,
- * but that would be quite a lot of memory overhead.
- */
-#ifndef PRISM_NEWLINE_LIST_H
-#define PRISM_NEWLINE_LIST_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-
-/**
- * A list of offsets of newlines in a string. The offsets are assumed to be
- * sorted/inserted in ascending order.
- */
-typedef struct {
- /** A pointer to the start of the source string. */
- const uint8_t *start;
-
- /** The number of offsets in the list. */
- size_t size;
-
- /** The capacity of the list that has been allocated. */
- size_t capacity;
-
- /** The list of offsets. */
- size_t *offsets;
-} pm_newline_list_t;
-
-/**
- * A line and column in a string.
- */
-typedef struct {
- /** The line number. */
- int32_t line;
-
- /** The column number. */
- uint32_t column;
-} pm_line_column_t;
-
-/**
- * Initialize a new newline list with the given capacity. Returns true if the
- * allocation of the offsets succeeds, otherwise returns false.
- *
- * @param list The list to initialize.
- * @param start A pointer to the start of the source string.
- * @param capacity The initial capacity of the list.
- * @return True if the allocation of the offsets succeeds, otherwise false.
- */
-bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
-
-/**
- * Clear out the newlines that have been appended to the list.
- *
- * @param list The list to clear.
- */
-void
-pm_newline_list_clear(pm_newline_list_t *list);
-
-/**
- * Append a new offset to the newline list. Returns true if the reallocation of
- * the offsets succeeds (if one was necessary), otherwise returns false.
- *
- * @param list The list to append to.
- * @param cursor A pointer to the offset to append.
- * @return True if the reallocation of the offsets succeeds (if one was
- * necessary), otherwise false.
- */
-bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
-
-/**
- * Returns the line of the given offset. If the offset is not in the list, the
- * line of the closest offset less than the given offset is returned.
- *
- * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
- * @param start_line The line to start counting from.
- * @return The line of the given offset.
- */
-int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
-
-/**
- * Returns the line and column of the given offset. If the offset is not in the
- * list, the line and column of the closest offset less than the given offset
- * are returned.
- *
- * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
- * @param start_line The line to start counting from.
- * @return The line and column of the given offset.
- */
-pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
-
-/**
- * Free the internal memory allocated for the newline list.
- *
- * @param list The list to free.
- */
-void pm_newline_list_free(pm_newline_list_t *list);
-
-#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
deleted file mode 100644
index 75422fbdf2..0000000000
--- a/prism/util/pm_string.c
+++ /dev/null
@@ -1,383 +0,0 @@
-#include "prism/util/pm_string.h"
-
-/**
- * Returns the size of the pm_string_t struct. This is necessary to allocate the
- * correct amount of memory in the FFI backend.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_sizeof(void) {
- return sizeof(pm_string_t);
-}
-
-/**
- * Initialize a shared string that is based on initial input.
- */
-void
-pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end) {
- assert(start <= end);
-
- *string = (pm_string_t) {
- .type = PM_STRING_SHARED,
- .source = start,
- .length = (size_t) (end - start)
- };
-}
-
-/**
- * Initialize an owned string that is responsible for freeing allocated memory.
- */
-void
-pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length) {
- *string = (pm_string_t) {
- .type = PM_STRING_OWNED,
- .source = source,
- .length = length
- };
-}
-
-/**
- * Initialize a constant string that doesn't own its memory source.
- */
-void
-pm_string_constant_init(pm_string_t *string, const char *source, size_t length) {
- *string = (pm_string_t) {
- .type = PM_STRING_CONSTANT,
- .source = (const uint8_t *) source,
- .length = length
- };
-}
-
-#ifdef _WIN32
-/**
- * Represents a file handle on Windows, where the path will need to be freed
- * when the file is closed.
- */
-typedef struct {
- /** The path to the file, which will become allocated memory. */
- WCHAR *path;
-
- /** The handle to the file, which will start as uninitialized memory. */
- HANDLE file;
-} pm_string_file_handle_t;
-
-/**
- * Open the file indicated by the filepath parameter for reading on Windows.
- * Perform any kind of normalization that needs to happen on the filepath.
- */
-static pm_string_init_result_t
-pm_string_file_handle_open(pm_string_file_handle_t *handle, const char *filepath) {
- int length = MultiByteToWideChar(CP_UTF8, 0, filepath, -1, NULL, 0);
- if (length == 0) return PM_STRING_INIT_ERROR_GENERIC;
-
- handle->path = xmalloc(sizeof(WCHAR) * ((size_t) length));
- if ((handle->path == NULL) || (MultiByteToWideChar(CP_UTF8, 0, filepath, -1, handle->path, length) == 0)) {
- xfree(handle->path);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- handle->file = CreateFileW(handle->path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
- if (handle->file == INVALID_HANDLE_VALUE) {
- pm_string_init_result_t result = PM_STRING_INIT_ERROR_GENERIC;
-
- if (GetLastError() == ERROR_ACCESS_DENIED) {
- DWORD attributes = GetFileAttributesW(handle->path);
- if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
- result = PM_STRING_INIT_ERROR_DIRECTORY;
- }
- }
-
- xfree(handle->path);
- return result;
- }
-
- return PM_STRING_INIT_SUCCESS;
-}
-
-/**
- * Close the file handle and free the path.
- */
-static void
-pm_string_file_handle_close(pm_string_file_handle_t *handle) {
- xfree(handle->path);
- CloseHandle(handle->file);
-}
-#endif
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * We want to use demand paging as much as possible in order to avoid having to
- * read the entire file into memory (which could be detrimental to performance
- * for large files). This means that if we're on windows we'll use
- * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
- * `mmap`, and on other POSIX systems we'll use `read`.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t
-pm_string_mapped_init(pm_string_t *string, const char *filepath) {
-#ifdef _WIN32
- // Open the file for reading.
- pm_string_file_handle_t handle;
- pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
- if (result != PM_STRING_INIT_SUCCESS) return result;
-
- // Get the file size.
- DWORD file_size = GetFileSize(handle.file, NULL);
- if (file_size == INVALID_FILE_SIZE) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // If the file is empty, then we don't need to do anything else, we'll set
- // the source to a constant empty string and return.
- if (file_size == 0) {
- pm_string_file_handle_close(&handle);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- // Create a mapping of the file.
- HANDLE mapping = CreateFileMapping(handle.file, NULL, PAGE_READONLY, 0, 0, NULL);
- if (mapping == NULL) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Map the file into memory.
- uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
- CloseHandle(mapping);
- pm_string_file_handle_close(&handle);
-
- if (source == NULL) {
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = (size_t) file_size };
- return PM_STRING_INIT_SUCCESS;
-#elif defined(_POSIX_MAPPED_FILES)
- // Open the file for reading
- int fd = open(filepath, O_RDONLY);
- if (fd == -1) {
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Stat the file to get the file size
- struct stat sb;
- if (fstat(fd, &sb) == -1) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Ensure it is a file and not a directory
- if (S_ISDIR(sb.st_mode)) {
- close(fd);
- return PM_STRING_INIT_ERROR_DIRECTORY;
- }
-
- // mmap the file descriptor to virtually get the contents
- size_t size = (size_t) sb.st_size;
- uint8_t *source = NULL;
-
- if (size == 0) {
- close(fd);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
- if (source == MAP_FAILED) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- close(fd);
- *string = (pm_string_t) { .type = PM_STRING_MAPPED, .source = source, .length = size };
- return PM_STRING_INIT_SUCCESS;
-#else
- return pm_string_file_init(string, filepath);
-#endif
-}
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t
-pm_string_file_init(pm_string_t *string, const char *filepath) {
-#ifdef _WIN32
- // Open the file for reading.
- pm_string_file_handle_t handle;
- pm_string_init_result_t result = pm_string_file_handle_open(&handle, filepath);
- if (result != PM_STRING_INIT_SUCCESS) return result;
-
- // Get the file size.
- DWORD file_size = GetFileSize(handle.file, NULL);
- if (file_size == INVALID_FILE_SIZE) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // If the file is empty, then we don't need to do anything else, we'll set
- // the source to a constant empty string and return.
- if (file_size == 0) {
- pm_string_file_handle_close(&handle);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- // Create a buffer to read the file into.
- uint8_t *source = xmalloc(file_size);
- if (source == NULL) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Read the contents of the file
- DWORD bytes_read;
- if (!ReadFile(handle.file, source, file_size, &bytes_read, NULL)) {
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Check the number of bytes read
- if (bytes_read != file_size) {
- xfree(source);
- pm_string_file_handle_close(&handle);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- pm_string_file_handle_close(&handle);
- *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size };
- return PM_STRING_INIT_SUCCESS;
-#elif defined(PRISM_HAS_FILESYSTEM)
- // Open the file for reading
- int fd = open(filepath, O_RDONLY);
- if (fd == -1) {
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Stat the file to get the file size
- struct stat sb;
- if (fstat(fd, &sb) == -1) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- // Ensure it is a file and not a directory
- if (S_ISDIR(sb.st_mode)) {
- close(fd);
- return PM_STRING_INIT_ERROR_DIRECTORY;
- }
-
- // Check the size to see if it's empty
- size_t size = (size_t) sb.st_size;
- if (size == 0) {
- close(fd);
- const uint8_t source[] = "";
- *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 };
- return PM_STRING_INIT_SUCCESS;
- }
-
- size_t length = (size_t) size;
- uint8_t *source = xmalloc(length);
- if (source == NULL) {
- close(fd);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- long bytes_read = (long) read(fd, source, length);
- close(fd);
-
- if (bytes_read == -1) {
- xfree(source);
- return PM_STRING_INIT_ERROR_GENERIC;
- }
-
- *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length };
- return PM_STRING_INIT_SUCCESS;
-#else
- (void) string;
- (void) filepath;
- perror("pm_string_file_init is not implemented for this platform");
- return PM_STRING_INIT_ERROR_GENERIC;
-#endif
-}
-
-/**
- * Ensure the string is owned. If it is not, then reinitialize it as owned and
- * copy over the previous source.
- */
-void
-pm_string_ensure_owned(pm_string_t *string) {
- if (string->type == PM_STRING_OWNED) return;
-
- size_t length = pm_string_length(string);
- const uint8_t *source = pm_string_source(string);
-
- uint8_t *memory = xmalloc(length);
- if (!memory) return;
-
- pm_string_owned_init(string, memory, length);
- memcpy((void *) string->source, source, length);
-}
-
-/**
- * Compare the underlying lengths and bytes of two strings. Returns 0 if the
- * strings are equal, a negative number if the left string is less than the
- * right string, and a positive number if the left string is greater than the
- * right string.
- */
-int
-pm_string_compare(const pm_string_t *left, const pm_string_t *right) {
- size_t left_length = pm_string_length(left);
- size_t right_length = pm_string_length(right);
-
- if (left_length < right_length) {
- return -1;
- } else if (left_length > right_length) {
- return 1;
- }
-
- return memcmp(pm_string_source(left), pm_string_source(right), left_length);
-}
-
-/**
- * Returns the length associated with the string.
- */
-PRISM_EXPORTED_FUNCTION size_t
-pm_string_length(const pm_string_t *string) {
- return string->length;
-}
-
-/**
- * Returns the start pointer associated with the string.
- */
-PRISM_EXPORTED_FUNCTION const uint8_t *
-pm_string_source(const pm_string_t *string) {
- return string->source;
-}
-
-/**
- * Free the associated memory of the given string.
- */
-PRISM_EXPORTED_FUNCTION void
-pm_string_free(pm_string_t *string) {
- void *memory = (void *) string->source;
-
- if (string->type == PM_STRING_OWNED) {
- xfree(memory);
-#ifdef PRISM_HAS_MMAP
- } else if (string->type == PM_STRING_MAPPED && string->length) {
-#if defined(_WIN32)
- UnmapViewOfFile(memory);
-#elif defined(_POSIX_MAPPED_FILES)
- munmap(memory, string->length);
-#endif
-#endif /* PRISM_HAS_MMAP */
- }
-}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
deleted file mode 100644
index f99f1abdf3..0000000000
--- a/prism/util/pm_string.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/**
- * @file pm_string.h
- *
- * A generic string type that can have various ownership semantics.
- */
-#ifndef PRISM_STRING_H
-#define PRISM_STRING_H
-
-#include "prism/defines.h"
-
-#include <assert.h>
-#include <errno.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-
-// The following headers are necessary to read files using demand paging.
-#ifdef _WIN32
-#include <windows.h>
-#elif defined(_POSIX_MAPPED_FILES)
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#elif defined(PRISM_HAS_FILESYSTEM)
-#include <fcntl.h>
-#include <sys/stat.h>
-#endif
-
-/**
- * A generic string type that can have various ownership semantics.
- */
-typedef struct {
- /** A pointer to the start of the string. */
- const uint8_t *source;
-
- /** The length of the string in bytes of memory. */
- size_t length;
-
- /** The type of the string. This field determines how the string should be freed. */
- enum {
- /** This string is a constant string, and should not be freed. */
- PM_STRING_CONSTANT,
-
- /** This is a slice of another string, and should not be freed. */
- PM_STRING_SHARED,
-
- /** This string owns its memory, and should be freed using `pm_string_free`. */
- PM_STRING_OWNED,
-
-#ifdef PRISM_HAS_MMAP
- /** This string is a memory-mapped file, and should be freed using `pm_string_free`. */
- PM_STRING_MAPPED
-#endif
- } type;
-} pm_string_t;
-
-/**
- * Returns the size of the pm_string_t struct. This is necessary to allocate the
- * correct amount of memory in the FFI backend.
- *
- * @return The size of the pm_string_t struct.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_string_sizeof(void);
-
-/**
- * Defines an empty string. This is useful for initializing a string that will
- * be filled in later.
- */
-#define PM_STRING_EMPTY ((pm_string_t) { .type = PM_STRING_CONSTANT, .source = NULL, .length = 0 })
-
-/**
- * Initialize a shared string that is based on initial input.
- *
- * @param string The string to initialize.
- * @param start The start of the string.
- * @param end The end of the string.
- */
-void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end);
-
-/**
- * Initialize an owned string that is responsible for freeing allocated memory.
- *
- * @param string The string to initialize.
- * @param source The source of the string.
- * @param length The length of the string.
- */
-void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length);
-
-/**
- * Initialize a constant string that doesn't own its memory source.
- *
- * @param string The string to initialize.
- * @param source The source of the string.
- * @param length The length of the string.
- */
-void pm_string_constant_init(pm_string_t *string, const char *source, size_t length);
-
-/**
- * Represents the result of calling pm_string_mapped_init or
- * pm_string_file_init. We need this additional information because there is
- * not a platform-agnostic way to indicate that the file that was attempted to
- * be opened was a directory.
- */
-typedef enum {
- /** Indicates that the string was successfully initialized. */
- PM_STRING_INIT_SUCCESS = 0,
- /**
- * Indicates a generic error from a string_*_init function, where the type
- * of error should be read from `errno` or `GetLastError()`.
- */
- PM_STRING_INIT_ERROR_GENERIC = 1,
- /**
- * Indicates that the file that was attempted to be opened was a directory.
- */
- PM_STRING_INIT_ERROR_DIRECTORY = 2
-} pm_string_init_result_t;
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * We want to use demand paging as much as possible in order to avoid having to
- * read the entire file into memory (which could be detrimental to performance
- * for large files). This means that if we're on windows we'll use
- * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
- * `mmap`, and on other POSIX systems we'll use `read`.
- *
- * @param string The string to initialize.
- * @param filepath The filepath to read.
- * @return The success of the read, indicated by the value of the enum.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_mapped_init(pm_string_t *string, const char *filepath);
-
-/**
- * Read the file indicated by the filepath parameter into source and load its
- * contents and size into the given `pm_string_t`. The given `pm_string_t`
- * should be freed using `pm_string_free` when it is no longer used.
- *
- * @param string The string to initialize.
- * @param filepath The filepath to read.
- * @return The success of the read, indicated by the value of the enum.
- */
-PRISM_EXPORTED_FUNCTION pm_string_init_result_t pm_string_file_init(pm_string_t *string, const char *filepath);
-
-/**
- * Ensure the string is owned. If it is not, then reinitialize it as owned and
- * copy over the previous source.
- *
- * @param string The string to ensure is owned.
- */
-void pm_string_ensure_owned(pm_string_t *string);
-
-/**
- * Compare the underlying lengths and bytes of two strings. Returns 0 if the
- * strings are equal, a negative number if the left string is less than the
- * right string, and a positive number if the left string is greater than the
- * right string.
- *
- * @param left The left string to compare.
- * @param right The right string to compare.
- * @return The comparison result.
- */
-int pm_string_compare(const pm_string_t *left, const pm_string_t *right);
-
-/**
- * Returns the length associated with the string.
- *
- * @param string The string to get the length of.
- * @return The length of the string.
- */
-PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string);
-
-/**
- * Returns the start pointer associated with the string.
- *
- * @param string The string to get the start pointer of.
- * @return The start pointer of the string.
- */
-PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string);
-
-/**
- * Free the associated memory of the given string.
- *
- * @param string The string to free.
- */
-PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string);
-
-#endif
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
deleted file mode 100644
index 916a4cc3fd..0000000000
--- a/prism/util/pm_strpbrk.c
+++ /dev/null
@@ -1,206 +0,0 @@
-#include "prism/util/pm_strpbrk.h"
-
-/**
- * Add an invalid multibyte character error to the parser.
- */
-static inline void
-pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
-}
-
-/**
- * Set the explicit encoding for the parser to the current encoding.
- */
-static inline void
-pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
- if (parser->explicit_encoding != NULL) {
- if (parser->explicit_encoding == parser->encoding) {
- // Okay, we already locked to this encoding.
- } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
- // Not okay, we already found a Unicode escape sequence and this
- // conflicts.
- pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
- } else {
- // Should not be anything else.
- assert(false && "unreachable");
- }
- }
-
- parser->explicit_encoding = parser->encoding;
-}
-
-/**
- * This is the default path.
- */
-static inline const uint8_t *
-pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (source[index] < 0x80) {
- index++;
- } else {
- size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
-
- if (width > 0) {
- index += width;
- } else if (!validate) {
- index++;
- } else {
- // At this point we know we have an invalid multibyte character.
- // We'll walk forward as far as we can until we find the next
- // valid character so that we don't spam the user with a ton of
- // the same kind of error.
- const size_t start = index;
-
- do {
- index++;
- } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
- pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
- }
- }
- }
-
- return NULL;
-}
-
-/**
- * This is the path when the encoding is ASCII-8BIT.
- */
-static inline const uint8_t *
-pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
- index++;
- }
-
- return NULL;
-}
-
-/**
- * This is the slow path that does care about the encoding.
- */
-static inline const uint8_t *
-pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
- const pm_encoding_t *encoding = parser->encoding;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (source[index] < 0x80) {
- index++;
- } else {
- size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
- if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
-
- if (width > 0) {
- index += width;
- } else if (!validate) {
- index++;
- } else {
- // At this point we know we have an invalid multibyte character.
- // We'll walk forward as far as we can until we find the next
- // valid character so that we don't spam the user with a ton of
- // the same kind of error.
- const size_t start = index;
-
- do {
- index++;
- } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
- pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
- }
- }
- }
-
- return NULL;
-}
-
-/**
- * This is the fast path that does not care about the encoding because we know
- * the encoding only supports single-byte characters.
- */
-static inline const uint8_t *
-pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
- size_t index = 0;
- const pm_encoding_t *encoding = parser->encoding;
-
- while (index < maximum) {
- if (strchr((const char *) charset, source[index]) != NULL) {
- return source + index;
- }
-
- if (source[index] < 0x80 || !validate) {
- index++;
- } else {
- size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
- pm_strpbrk_explicit_encoding_set(parser, source, width);
-
- if (width > 0) {
- index += width;
- } else {
- // At this point we know we have an invalid multibyte character.
- // We'll walk forward as far as we can until we find the next
- // valid character so that we don't spam the user with a ton of
- // the same kind of error.
- const size_t start = index;
-
- do {
- index++;
- } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
-
- pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
- }
- }
- }
-
- return NULL;
-}
-
-/**
- * Here we have rolled our own version of strpbrk. The standard library strpbrk
- * has undefined behavior when the source string is not null-terminated. We want
- * to support strings that are not null-terminated because pm_parse does not
- * have the contract that the string is null-terminated. (This is desirable
- * because it means the extension can call pm_parse with the result of a call to
- * mmap).
- *
- * The standard library strpbrk also does not support passing a maximum length
- * to search. We want to support this for the reason mentioned above, but we
- * also don't want it to stop on null bytes. Ruby actually allows null bytes
- * within strings, comments, regular expressions, etc. So we need to be able to
- * skip past them.
- *
- * Finally, we want to support encodings wherein the charset could contain
- * characters that are trailing bytes of multi-byte characters. For example, in
- * Shift_JIS, the backslash character can be a trailing byte. In that case we
- * need to take a slower path and iterate one multi-byte character at a time.
- */
-const uint8_t *
-pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
- if (length <= 0) {
- return NULL;
- } else if (!parser->encoding_changed) {
- return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
- } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
- return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
- } else if (parser->encoding->multibyte) {
- return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
- } else {
- return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
- }
-}
diff --git a/prism/version.h b/prism/version.h
index 0a2a8c8fce..181b398462 100644
--- a/prism/version.h
+++ b/prism/version.h
@@ -6,6 +6,8 @@
#ifndef PRISM_VERSION_H
#define PRISM_VERSION_H
+#include "prism/compiler/exported.h"
+
/**
* The major version of the Prism library as an int.
*/
@@ -14,7 +16,7 @@
/**
* The minor version of the Prism library as an int.
*/
-#define PRISM_VERSION_MINOR 4
+#define PRISM_VERSION_MINOR 9
/**
* The patch version of the Prism library as an int.
@@ -24,6 +26,13 @@
/**
* The version of the Prism library as a constant string.
*/
-#define PRISM_VERSION "1.4.0"
+#define PRISM_VERSION "1.9.0"
+
+/**
+ * The prism version and the serialization format.
+ *
+ * @returns The prism version as a constant string.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_version(void);
#endif