summaryrefslogtreecommitdiff
path: root/prism/templates
diff options
context:
space:
mode:
Diffstat (limited to 'prism/templates')
-rw-r--r--prism/templates/ext/prism/api_node.c.erb296
-rw-r--r--prism/templates/include/prism/ast.h.erb278
-rw-r--r--prism/templates/include/prism/internal/diagnostic.h.erb60
-rw-r--r--prism/templates/lib/prism/compiler.rb.erb52
-rw-r--r--prism/templates/lib/prism/dispatcher.rb.erb111
-rw-r--r--prism/templates/lib/prism/dot_visitor.rb.erb215
-rw-r--r--prism/templates/lib/prism/dsl.rb.erb172
-rw-r--r--prism/templates/lib/prism/inspect_visitor.rb.erb147
-rw-r--r--prism/templates/lib/prism/mutation_compiler.rb.erb22
-rw-r--r--prism/templates/lib/prism/node.rb.erb748
-rw-r--r--prism/templates/lib/prism/reflection.rb.erb145
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb702
-rw-r--r--prism/templates/lib/prism/visitor.rb.erb73
-rw-r--r--prism/templates/src/diagnostic.c.erb554
-rw-r--r--prism/templates/src/json.c.erb130
-rw-r--r--prism/templates/src/node.c.erb166
-rw-r--r--prism/templates/src/prettyprint.c.erb177
-rw-r--r--prism/templates/src/serialize.c.erb404
-rw-r--r--prism/templates/src/tokens.c.erb367
-rwxr-xr-xprism/templates/template.rb723
20 files changed, 5542 insertions, 0 deletions
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
new file mode 100644
index 0000000000..41d7165930
--- /dev/null
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -0,0 +1,296 @@
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+#include "prism/extension.h"
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+
+#include <assert.h>
+
+extern VALUE rb_cPrism;
+extern VALUE rb_cPrismNode;
+extern VALUE rb_cPrismSource;
+extern VALUE rb_cPrismToken;
+extern VALUE rb_cPrismLocation;
+
+<%- nodes.each do |node| -%>
+static VALUE rb_cPrism<%= node.name %>;
+<%- end -%>
+
+static VALUE
+pm_location_new(const uint32_t start, const uint32_t length, VALUE source, bool freeze) {
+ if (freeze) {
+ VALUE location_argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
+ return rb_obj_freeze(rb_class_new_instance(3, location_argv, rb_cPrismLocation));
+ } else {
+ uint64_t value = ((((uint64_t) start) << 32) | ((uint64_t) length));
+ return ULL2NUM(value);
+ }
+}
+
+VALUE
+pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
+ ID type = rb_intern(pm_token_type(token->type));
+ VALUE location = pm_location_new((uint32_t) (token->start - pm_parser_start(parser)), (uint32_t) (token->end - token->start), source, freeze);
+
+ VALUE slice = rb_enc_str_new((const char *) token->start, token->end - token->start, encoding);
+ if (freeze) rb_obj_freeze(slice);
+
+ VALUE argv[] = { source, ID2SYM(type), slice, location };
+ VALUE value = rb_class_new_instance(4, argv, rb_cPrismToken);
+ if (freeze) rb_obj_freeze(value);
+
+ return value;
+}
+
+static VALUE
+pm_string_new(const pm_string_t *string, rb_encoding *encoding) {
+ return rb_obj_freeze(rb_enc_str_new((const char *) pm_string_source(string), pm_string_length(string), encoding));
+}
+
+VALUE
+pm_integer_new(const pm_integer_t *integer) {
+ VALUE result;
+ if (integer->values == NULL) {
+ result = UINT2NUM(integer->value);
+ } else {
+ VALUE string = rb_str_new(NULL, integer->length * 8);
+ unsigned char *bytes = (unsigned char *) RSTRING_PTR(string);
+
+ size_t offset = integer->length * 8;
+ for (size_t value_index = 0; value_index < integer->length; value_index++) {
+ uint32_t value = integer->values[value_index];
+
+ for (int index = 0; index < 8; index++) {
+ int byte = (value >> (4 * index)) & 0xf;
+ bytes[--offset] = byte < 10 ? byte + '0' : byte - 10 + 'a';
+ }
+ }
+
+ result = rb_funcall(string, rb_intern("to_i"), 1, UINT2NUM(16));
+ }
+
+ if (integer->negative) {
+ result = rb_funcall(result, rb_intern("-@"), 0);
+ }
+
+ return result;
+}
+
+// Create a Prism::Source object from the given parser, after pm_parse() was called.
+VALUE
+pm_source_new(const pm_parser_t *parser, rb_encoding *encoding, bool freeze) {
+ const uint8_t *start = pm_parser_start(parser);
+ VALUE source_string = rb_enc_str_new((const char *) start, pm_parser_end(parser) - start, encoding);
+
+ const pm_line_offset_list_t *line_offsets = pm_parser_line_offsets(parser);
+ VALUE offsets;
+
+ if (freeze) {
+ offsets = rb_ary_new_capa(line_offsets->size);
+ for (size_t index = 0; index < line_offsets->size; index++) {
+ rb_ary_push(offsets, ULONG2NUM(line_offsets->offsets[index]));
+ }
+
+ rb_obj_freeze(source_string);
+ rb_obj_freeze(offsets);
+ } else {
+ offsets = rb_str_new((const char *) line_offsets->offsets, line_offsets->size * sizeof(uint32_t));
+ }
+
+ VALUE source = rb_funcall(rb_cPrismSource, rb_intern("for"), 3, source_string, LONG2NUM(pm_parser_start_line(parser)), offsets);
+ if (freeze) rb_obj_freeze(source);
+
+ return source;
+}
+
+typedef struct pm_node_stack_node {
+ struct pm_node_stack_node *prev;
+ const pm_node_t *visit;
+ bool visited;
+} pm_node_stack_node_t;
+
+static void
+pm_node_stack_push(pm_arena_t *arena, pm_node_stack_node_t **stack, const pm_node_t *visit) {
+ pm_node_stack_node_t *node = (pm_node_stack_node_t *) pm_arena_alloc(arena, sizeof(pm_node_stack_node_t), PRISM_ALIGNOF(pm_node_stack_node_t));
+ node->prev = *stack;
+ node->visit = visit;
+ node->visited = false;
+ *stack = node;
+}
+
+static const pm_node_t *
+pm_node_stack_pop(pm_node_stack_node_t **stack) {
+ pm_node_stack_node_t *current = *stack;
+ const pm_node_t *visit = current->visit;
+
+ *stack = current->prev;
+
+ return visit;
+}
+
+typedef struct {
+ VALUE constants;
+ rb_encoding *encoding;
+} pm_ast_constants_each_data_t;
+
+static void
+pm_ast_constants_each(const pm_constant_t *constant, void *data) {
+ pm_ast_constants_each_data_t *constants_data = (pm_ast_constants_each_data_t *) data;
+ int state = 0;
+
+ VALUE string = rb_enc_str_new((const char *) pm_constant_start(constant), pm_constant_length(constant), constants_data->encoding);
+ VALUE value = rb_protect(rb_str_intern, string, &state);
+
+ if (state != 0) {
+ value = ID2SYM(rb_intern_const("?"));
+ rb_set_errinfo(Qnil);
+ }
+
+ rb_ary_push(constants_data->constants, value);
+}
+
+VALUE
+pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encoding, VALUE source, bool freeze) {
+ VALUE constants = rb_ary_new_capa(pm_parser_constants_size(parser));
+ pm_ast_constants_each_data_t constants_data = { .constants = constants, .encoding = encoding };
+ pm_parser_constants_each(parser, pm_ast_constants_each, &constants_data);
+
+ pm_arena_t *node_arena = pm_arena_new();
+ pm_node_stack_node_t *node_stack = NULL;
+ pm_node_stack_push(node_arena, &node_stack, node);
+ VALUE value_stack = rb_ary_new();
+
+ while (node_stack != NULL) {
+ if (!node_stack->visited) {
+ if (node_stack->visit == NULL) {
+ pm_node_stack_pop(&node_stack);
+ rb_ary_push(value_stack, Qnil);
+ continue;
+ }
+
+ const pm_node_t *node = node_stack->visit;
+ node_stack->visited = true;
+
+ switch (PM_NODE_TYPE(node)) {
+ <%- nodes.each do |node| -%>
+ <%- if node.fields.any? { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ case <%= node.type %>: {
+ pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
+ pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>);
+ <%- when Prism::Template::NodeListField -%>
+ for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+ pm_node_stack_push(node_arena, &node_stack, (pm_node_t *) cast-><%= field.name %>.nodes[index]);
+ }
+ <%- end -%>
+ <%- end -%>
+ break;
+ }
+ <%- end -%>
+ <%- end -%>
+ default:
+ break;
+ }
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ } else {
+ const pm_node_t *node = pm_node_stack_pop(&node_stack);
+
+ switch (PM_NODE_TYPE(node)) {
+ <%- nodes.each do |node| -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ case <%= node.type %>: {
+ <%- if node.fields.any? { |field| ![Prism::Template::NodeField, Prism::Template::OptionalNodeField].include?(field.class) } -%>
+ pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+ <%- end -%>
+ VALUE argv[<%= node.fields.length + 4 %>];
+
+ // source
+ argv[0] = source;
+
+ // node_id
+ argv[1] = ULONG2NUM(node->node_id);
+
+ // location
+ argv[2] = pm_location_new(node->location.start, node->location.length, source, freeze);
+
+ // flags
+ argv[3] = ULONG2NUM(node->flags);
+ <%- node.fields.each.with_index(4) do |field, index| -%>
+
+ // <%= field.name %>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField, Prism::Template::OptionalNodeField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = rb_ary_pop(value_stack);
+ <%- when Prism::Template::NodeListField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
+ for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+ rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
+ }
+ if (freeze) rb_obj_freeze(argv[<%= index %>]);
+ <%- when Prism::Template::StringField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = pm_string_new(&cast-><%= field.name %>, encoding);
+ <%- when Prism::Template::ConstantField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ assert(cast-><%= field.name %> != 0);
+ argv[<%= index %>] = RARRAY_AREF(constants, cast-><%= field.name %> - 1);
+ <%- when Prism::Template::OptionalConstantField -%>
+ argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : RARRAY_AREF(constants, cast-><%= field.name %> - 1);
+ <%- when Prism::Template::ConstantListField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
+ for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+ assert(cast-><%= field.name %>.ids[index] != 0);
+ rb_ary_push(argv[<%= index %>], RARRAY_AREF(constants, cast-><%= field.name %>.ids[index] - 1));
+ }
+ if (freeze) rb_obj_freeze(argv[<%= index %>]);
+ <%- when Prism::Template::LocationField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
+ <%- when Prism::Template::OptionalLocationField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = cast-><%= field.name %>.length == 0 ? Qnil : pm_location_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
+ <%- when Prism::Template::UInt8Field -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = ULONG2NUM(cast-><%= field.name %>);
+ <%- when Prism::Template::IntegerField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = pm_integer_new(&cast-><%= field.name %>);
+ <%- when Prism::Template::DoubleField -%>
+#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
+ argv[<%= index %>] = DBL2NUM(cast-><%= field.name %>);
+ <%- else -%>
+ <%- raise -%>
+ <%- end -%>
+ <%- end -%>
+
+ VALUE value = rb_class_new_instance(<%= node.fields.length + 4 %>, argv, rb_cPrism<%= node.name %>);
+ if (freeze) rb_obj_freeze(value);
+
+ rb_ary_push(value_stack, value);
+ break;
+ }
+ <%- end -%>
+ default:
+ rb_raise(rb_eRuntimeError, "unknown node type: %d", PM_NODE_TYPE(node));
+ }
+ }
+ }
+
+ pm_arena_free(node_arena);
+ return rb_ary_pop(value_stack);
+}
+
+void
+Init_prism_api_node(void) {
+ <%- nodes.each do |node| -%>
+ rb_cPrism<%= node.name %> = rb_define_class_under(rb_cPrism, "<%= node.name %>", rb_cPrismNode);
+ <%- end -%>
+}
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
new file mode 100644
index 0000000000..3b3be25e76
--- /dev/null
+++ b/prism/templates/include/prism/ast.h.erb
@@ -0,0 +1,278 @@
+/**
+ * @file ast.h
+ *
+ * The abstract syntax tree.
+ *
+ * --
+ */
+#ifndef PRISM_AST_H
+#define PRISM_AST_H
+
+#include "prism/compiler/align.h"
+#include "prism/compiler/exported.h"
+
+#include "prism/arena.h"
+#include "prism/constant_pool.h"
+#include "prism/integer.h"
+#include "prism/stringy.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * This enum represents every type of token in the Ruby source.
+ */
+typedef enum pm_token_type {
+<%- tokens.each do |token| -%>
+ /** <%= Prism::Template::Doxygen.verbatim(token.comment) %> */
+ PM_TOKEN_<%= token.name %><%= " = #{token.value}" if token.value %>,
+
+<%- end -%>
+ /** The maximum token value. */
+ PM_TOKEN_MAXIMUM,
+} pm_token_type_t;
+
+/**
+ * This struct represents a token in the Ruby source. We use it to track both
+ * type and location information.
+ */
+typedef struct {
+ /** The type of the token. */
+ pm_token_type_t type;
+
+ /** A pointer to the start location of the token in the source. */
+ const uint8_t *start;
+
+ /** A pointer to the end location of the token in the source. */
+ const uint8_t *end;
+} pm_token_t;
+
+/**
+ * Returns a string representation of the given token type.
+ *
+ * @param token_type The type of the token to get the string representation of.
+ * @returns A string representation of the given token type. This is meant for
+ * debugging purposes and is not guaranteed to be stable across versions.
+ */
+PRISM_EXPORTED_FUNCTION const char * pm_token_type(pm_token_type_t token_type);
+
+/**
+ * This struct represents a slice in the source code, defined by an offset and
+ * a length. Note that we have confirmation that we can represent all locations
+ * within Ruby source files using 32-bit integers per:
+ *
+ * https://bugs.ruby-lang.org/issues/20488#note-1
+ *
+ */
+typedef struct {
+ /** The offset of the location from the start of the source. */
+ uint32_t start;
+
+ /** The length of the location. */
+ uint32_t length;
+} pm_location_t;
+
+struct pm_node;
+
+/**
+ * A list of nodes in the source, most often used for lists of children.
+ */
+typedef struct pm_node_list {
+ /** The number of nodes in the list. */
+ size_t size;
+
+ /** The capacity of the list that has been allocated. */
+ size_t capacity;
+
+ /** The nodes in the list. */
+ struct pm_node **nodes;
+} pm_node_list_t;
+
+/**
+ * This enum represents every type of node in the Ruby syntax tree.
+ */
+enum pm_node_type {
+<%- nodes.each_with_index do |node, index| -%>
+ /** <%= node.name %> */
+ <%= node.type %> = <%= index + 1 %>,
+
+<%- end -%>
+ /** A special kind of node used for compilation. */
+ PM_SCOPE_NODE
+};
+
+/**
+ * This is the type of node embedded in the node struct. We explicitly control
+ * the size of it here to avoid having the variable-width enum.
+ */
+typedef uint16_t pm_node_type_t;
+
+/**
+ * These are the flags embedded in the node struct. We explicitly control the
+ * size of it here to avoid having the variable-width enum.
+ */
+typedef uint16_t pm_node_flags_t;
+
+/**
+ * We store the flags enum in every node in the tree. Some flags are common to
+ * all nodes (the ones listed below). Others are specific to certain node types.
+ */
+static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = 0x1;
+static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = 0x2;
+
+/**
+ * This is the base structure that represents a node in the syntax tree. It is
+ * embedded into every node type.
+ */
+typedef struct pm_node {
+ /**
+ * This represents the type of the node. It somewhat maps to the nodes that
+ * existed in the original grammar and ripper, but it is not a 1:1 mapping.
+ */
+ pm_node_type_t type;
+
+ /**
+ * This represents any flags on the node. Some are common to all nodes, and
+ * some are specific to the type of node.
+ */
+ pm_node_flags_t flags;
+
+ /**
+ * The unique identifier for this node, which is deterministic based on the
+ * source. It is used to identify unique nodes across parses.
+ */
+ uint32_t node_id;
+
+ /**
+ * This is the location of the node in the source. It is a range of bytes
+ * containing a start and an end.
+ */
+ pm_location_t location;
+} pm_node_t;
+
+/**
+ * Cast the given node to the base pm_node_t type.
+ */
+#define PM_NODE_UPCAST(node_) ((pm_node_t *) (node_))
+
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node_) ((enum pm_node_type) (node_)->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
+#define PM_NODE_TYPE_P(node_, type_) (PM_NODE_TYPE(node_) == (type_))
+
+/**
+ * Return the flags associated with the given node.
+ */
+#define PM_NODE_FLAGS(node_) (PM_NODE_UPCAST(node_)->flags)
+
+/**
+ * Return true if the given flag is set on the given node.
+ */
+#define PM_NODE_FLAG_P(node_, flag_) ((PM_NODE_FLAGS(node_) & (flag_)) != 0)
+
+/**
+ * The alignment required for a child node within a parent node.
+ */
+#ifdef _MSC_VER
+#define PM_NODE_ALIGNAS __declspec(align(8))
+#else
+#define PM_NODE_ALIGNAS PRISM_ALIGNAS(PRISM_ALIGNOF(void *))
+#endif
+<%- nodes.each do |node| -%>
+
+/**
+ * <%= node.name %>
+ *
+<%- node.each_comment_line do |line| -%>
+ *<%= line %>
+<%- end -%>
+ *
+ * Type: ::<%= node.type %>
+<% if (node_flags = node.flags) %>
+ * Flags (#pm_<%= node_flags.human %>):
+<%- node_flags.values.each do |value| -%>
+ * * ::PM_<%= node_flags.human.upcase %>_<%= value.name %>
+<%- end -%>
+<%- end -%>
+ *
+ * @extends pm_node_t
+ */
+typedef struct pm_<%= node.human %> {
+ /** The embedded base node. */
+ pm_node_t base;
+<%- node.fields.each do |field| -%>
+
+ /**
+ * <%= node.name %>#<%= field.name %>
+ <%- if field.comment -%>
+ *
+ <%- field.each_comment_line do |line| -%>
+ *<%= line %>
+ <%- end -%>
+ <%- end -%>
+ */
+ <%= case field
+ when Prism::Template::NodeField, Prism::Template::OptionalNodeField then "PM_NODE_ALIGNAS struct #{field.c_type} *#{field.name}"
+ when Prism::Template::NodeListField then "struct pm_node_list #{field.name}"
+ when Prism::Template::ConstantField, Prism::Template::OptionalConstantField then "pm_constant_id_t #{field.name}"
+ when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}"
+ when Prism::Template::StringField then "pm_string_t #{field.name}"
+ when Prism::Template::LocationField, Prism::Template::OptionalLocationField then "pm_location_t #{field.name}"
+ when Prism::Template::UInt8Field then "uint8_t #{field.name}"
+ when Prism::Template::UInt32Field then "uint32_t #{field.name}"
+ when Prism::Template::IntegerField then "pm_integer_t #{field.name}"
+ when Prism::Template::DoubleField then "double #{field.name}"
+ else raise field.class.name
+ end
+ %>;
+<%- end -%>
+} pm_<%= node.human %>_t;
+<%- end -%>
+<%- flags.each do |flag| -%>
+
+/**
+ * <%= flag.comment %>
+ */
+typedef enum pm_<%= flag.human %> {
+ <%- flag.values.each_with_index do |value, index| -%>
+<%= "\n" if index > 0 -%>
+ /** <%= value.comment %> */
+ PM_<%= flag.human.upcase %>_<%= value.name %> = <%= 1 << (index + Prism::Template::COMMON_FLAGS_COUNT) %>,
+ <%- end -%>
+
+ PM_<%= flag.human.upcase %>_LAST,
+} pm_<%= flag.human %>_t;
+<%- end -%>
+<%- nodes.each do |node| -%>
+
+<%- params = node.fields.map(&:c_param) -%>
+/**
+ * Allocate and initialize a new <%= node.name %> node.
+ *
+ * @param arena The arena to allocate from.
+ * @param node_id The unique identifier for this node.
+ * @param flags The flags for this node.
+ * @param location The location of this node in the source.
+<%- node.fields.each do |field| -%>
+ * @param <%= field.name %> <%= field.comment ? Prism::Template::Doxygen.verbatim(field.comment.lines.first.strip) : "The #{field.name} field." %>
+<%- end -%>
+ * @returns The newly allocated and initialized node.
+ */
+PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>);
+<%- end -%>
+
+/**
+ * When we're serializing to Java, we want to skip serializing the location
+ * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us
+ * to specify that through the environment. It will never be true except for in
+ * those build systems.
+ */
+#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0 %>
+
+#endif
diff --git a/prism/templates/include/prism/internal/diagnostic.h.erb b/prism/templates/include/prism/internal/diagnostic.h.erb
new file mode 100644
index 0000000000..ee44ff5382
--- /dev/null
+++ b/prism/templates/include/prism/internal/diagnostic.h.erb
@@ -0,0 +1,60 @@
+#ifndef PRISM_INTERNAL_DIAGNOSTIC_H
+#define PRISM_INTERNAL_DIAGNOSTIC_H
+
+#include "prism/internal/list.h"
+
+#include "prism/arena.h"
+#include "prism/diagnostic.h"
+
+/*
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
+typedef enum {
+ /* These are the error diagnostics. */
+ <%- errors.each do |error| -%>
+ PM_ERR_<%= error.name %>,
+ <%- end -%>
+
+ /* These are the warning diagnostics. */
+ <%- warnings.each do |warning| -%>
+ PM_WARN_<%= warning.name %>,
+ <%- end -%>
+} pm_diagnostic_id_t;
+
+/*
+ * This struct represents a diagnostic generated during parsing.
+ */
+struct pm_diagnostic_t {
+ /* The embedded base node. */
+ pm_list_node_t node;
+
+ /* The location of the diagnostic in the source. */
+ pm_location_t location;
+
+ /* The ID of the diagnostic. */
+ pm_diagnostic_id_t diag_id;
+
+ /* The message associated with the diagnostic. */
+ const char *message;
+
+ /*
+ * The level of the diagnostic, see `pm_error_level_t` and
+ * `pm_warning_level_t` for possible values.
+ */
+ uint8_t level;
+};
+
+/*
+ * Append a diagnostic to the given list of diagnostics that is using shared
+ * memory for its message.
+ */
+void pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id);
+
+/*
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ */
+void pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...);
+
+#endif
diff --git a/prism/templates/lib/prism/compiler.rb.erb b/prism/templates/lib/prism/compiler.rb.erb
new file mode 100644
index 0000000000..13317cac04
--- /dev/null
+++ b/prism/templates/lib/prism/compiler.rb.erb
@@ -0,0 +1,52 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # A compiler is a visitor that returns the value of each node as it visits.
+ # This is as opposed to a visitor which will only walk the tree. This can be
+ # useful when you are trying to compile a tree into a different format.
+ #
+ # For example, to build a representation of the tree as s-expressions, you
+ # could write:
+ #
+ # class SExpressions < Prism::Compiler
+ # def visit_arguments_node(node) = [:arguments, super]
+ # def visit_call_node(node) = [:call, super]
+ # def visit_integer_node(node) = [:integer]
+ # def visit_program_node(node) = [:program, super]
+ # end
+ #
+ # Prism.parse("1 + 2").value.accept(SExpressions.new)
+ # # => [:program, [[[:call, [[:integer], [:arguments, [[:integer]]]]]]]]
+ #
+ class Compiler < Visitor
+ # Visit an individual node.
+ #--
+ #: (node?) -> untyped
+ def visit(node) # :nodoc:
+ node&.accept(self)
+ end
+
+ # Visit a list of nodes.
+ #--
+ #: (Array[node?]) -> untyped
+ def visit_all(nodes) # :nodoc:
+ nodes.map { |node| node&.accept(self) }
+ end
+
+ # Visit the child nodes of the given node.
+ #--
+ #: (node) -> Array[untyped]
+ def visit_child_nodes(node) # :nodoc:
+ node.each_child_node.map { |node| node.accept(self) }
+ end
+
+ <%- nodes.each_with_index do |node, index| -%>
+<%= "\n" if index != 0 -%>
+ #: (<%= node.name %>) -> Array[untyped]
+ def visit_<%= node.human %>(node) # :nodoc:
+ node.each_child_node.map { |node| node.accept(self) }
+ end
+ <%- end -%>
+ end
+end
diff --git a/prism/templates/lib/prism/dispatcher.rb.erb b/prism/templates/lib/prism/dispatcher.rb.erb
new file mode 100644
index 0000000000..5991b0c904
--- /dev/null
+++ b/prism/templates/lib/prism/dispatcher.rb.erb
@@ -0,0 +1,111 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # The dispatcher class fires events for nodes that are found while walking an
+ # AST to all registered listeners. It's useful for performing different types
+ # of analysis on the AST while only having to walk the tree once.
+ #
+ # To use the dispatcher, you would first instantiate it and register listeners
+ # for the events you're interested in:
+ #
+ # class OctalListener
+ # def on_integer_node_enter(node)
+ # if node.octal? && !node.slice.start_with?("0o")
+ # warn("Octal integers should be written with the 0o prefix")
+ # end
+ # end
+ # end
+ #
+ # listener = OctalListener.new
+ # dispatcher = Prism::Dispatcher.new
+ # dispatcher.register(listener, :on_integer_node_enter)
+ #
+ # Then, you can walk any number of trees and dispatch events to the listeners:
+ #
+ # result = Prism.parse("001 + 002 + 003")
+ # dispatcher.dispatch(result.value)
+ #
+ # Optionally, you can also use `#dispatch_once` to dispatch enter and leave
+ # events for a single node without recursing further down the tree. This can
+ # be useful in circumstances where you want to reuse the listeners you already
+ # have registers but want to stop walking the tree at a certain point.
+ #
+ # integer = result.value.statements.body.first.receiver.receiver
+ # dispatcher.dispatch_once(integer)
+ #
+ class Dispatcher < Visitor
+ # A hash mapping event names to arrays of listeners that should be notified
+ # when that event is fired.
+ attr_reader :listeners #: Hash[Symbol, Array[untyped]]
+
+ # Initialize a new dispatcher.
+ #--
+ #: () -> void
+ def initialize
+ @listeners = {}
+ end
+
+ # Register a listener for one or more events.
+ #--
+ #: (untyped, *Symbol) -> void
+ def register(listener, *events)
+ register_events(listener, events)
+ end
+
+ # Register all public methods of a listener that match the pattern
+ # `on_<node_name>_(enter|leave)`.
+ #--
+ #: (untyped) -> void
+ def register_public_methods(listener)
+ register_events(listener, listener.public_methods(false).grep(/\Aon_.+_(?:enter|leave)\z/))
+ end
+
+ # Register a listener for the given events.
+ #--
+ #: (untyped, Array[Symbol]) -> void
+ private def register_events(listener, events) # :nodoc:
+ events.each { |event| (listeners[event] ||= []) << listener }
+ end
+
+ # Walks `root` dispatching events to all registered listeners.
+ alias dispatch visit
+
+ # Dispatches a single event for `node` to all registered listeners.
+ #--
+ #: (node node) -> void
+ def dispatch_once(node)
+ node.accept(DispatchOnce.new(listeners))
+ end
+ <%- nodes.each do |node| -%>
+
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node) # :nodoc:
+ listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
+ super
+ listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
+ end
+ <%- end -%>
+
+ class DispatchOnce < Visitor # :nodoc:
+ attr_reader :listeners #: Hash[Symbol, Array[untyped]]
+
+ #: (Hash[Symbol, Array[untyped]] listeners) -> void
+ def initialize(listeners)
+ @listeners = listeners
+ end
+ <%- nodes.each do |node| -%>
+
+ # Dispatch enter and leave events for <%= node.name %> nodes.
+ #--
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node)
+ listeners[:on_<%= node.human %>_enter]&.each { |listener| listener.on_<%= node.human %>_enter(node) }
+ listeners[:on_<%= node.human %>_leave]&.each { |listener| listener.on_<%= node.human %>_leave(node) }
+ end
+ <%- end -%>
+ end
+
+ private_constant :DispatchOnce
+ end
+end
diff --git a/prism/templates/lib/prism/dot_visitor.rb.erb b/prism/templates/lib/prism/dot_visitor.rb.erb
new file mode 100644
index 0000000000..88ef1e1f36
--- /dev/null
+++ b/prism/templates/lib/prism/dot_visitor.rb.erb
@@ -0,0 +1,215 @@
+#--
+# rbs_inline: enabled
+
+require "cgi/escape"
+require "cgi/util" unless defined?(CGI::EscapeExt)
+
+module Prism
+ # This visitor provides the ability to call Node#to_dot, which converts a
+ # subtree into a graphviz dot graph.
+ class DotVisitor < Visitor
+ class Field # :nodoc:
+ attr_reader :name #: String
+ attr_reader :value #: String?
+ attr_reader :port #: bool
+
+ #: (String name, String? value, bool port) -> void
+ def initialize(name, value, port)
+ @name = name
+ @value = value
+ @port = port
+ end
+
+ #: () -> String
+ def to_dot
+ if port
+ "<tr><td align=\"left\" colspan=\"2\" port=\"#{name}\">#{name}</td></tr>"
+ else
+ "<tr><td align=\"left\">#{name}</td><td>#{CGI.escapeHTML(value || raise)}</td></tr>"
+ end
+ end
+ end
+
+ class Table # :nodoc:
+ attr_reader :name #: String
+ attr_reader :fields #: Array[Field]
+
+ #: (String name) -> void
+ def initialize(name)
+ @name = name
+ @fields = []
+ end
+
+ #: (String name, ?String? value, ?port: bool) -> void
+ def field(name, value = nil, port: false)
+ fields << Field.new(name, value, port)
+ end
+
+ #: () -> String
+ def to_dot
+ dot = <<~DOT
+ <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
+ <tr><td colspan="2"><b>#{name}</b></td></tr>
+ DOT
+
+ if fields.any?
+ "#{dot} #{fields.map(&:to_dot).join("\n ")}\n</table>"
+ else
+ "#{dot}</table>"
+ end
+ end
+ end
+
+ class Digraph # :nodoc:
+ attr_reader :nodes, :waypoints, :edges #: Array[String]
+
+ #: () -> void
+ def initialize
+ @nodes = []
+ @waypoints = []
+ @edges = []
+ end
+
+ #: (String value) -> void
+ def node(value)
+ nodes << value
+ end
+
+ #: (String value) -> void
+ def waypoint(value)
+ waypoints << value
+ end
+
+ #: (String value) -> void
+ def edge(value)
+ edges << value
+ end
+
+ #: () -> String
+ def to_dot
+ <<~DOT
+ digraph "Prism" {
+ node [
+ fontname=\"Courier New\"
+ shape=plain
+ style=filled
+ fillcolor=gray95
+ ];
+
+ #{nodes.map { |node| node.gsub(/\n/, "\n ") }.join("\n ")}
+ node [shape=point];
+ #{waypoints.join("\n ")}
+
+ #{edges.join("\n ")}
+ }
+ DOT
+ end
+ end
+
+ private_constant :Field, :Table, :Digraph
+
+ # The digraph that is being built.
+ attr_reader :digraph #: Digraph
+
+ # Initialize a new dot visitor.
+ #--
+ #: () -> void
+ def initialize
+ @digraph = Digraph.new
+ end
+
+ # Convert this visitor into a graphviz dot graph string.
+ #--
+ #: () -> String
+ def to_dot
+ digraph.to_dot
+ end
+ <%- nodes.each do |node| -%>
+
+ #: (<%= node.name %>) -> void
+ def visit_<%= node.human %>(node) # :nodoc:
+ table = Table.new("<%= node.name %>")
+ id = node_id(node)
+ <%- if (node_flags = node.flags) -%>
+
+ # flags
+ table.field("flags", <%= node_flags.human %>_inspect(node))
+ <%- end -%>
+ <%- node.fields.each do |field| -%>
+
+ # <%= field.name %>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ table.field("<%= field.name %>", port: true)
+ digraph.edge("#{id}:<%= field.name %> -> #{node_id(node.<%= field.name %>)};")
+ <%- when Prism::Template::OptionalNodeField -%>
+ unless (<%= field.name %> = node.<%= field.name %>).nil?
+ table.field("<%= field.name %>", port: true)
+ digraph.edge("#{id}:<%= field.name %> -> #{node_id(<%= field.name %>)};")
+ end
+ <%- when Prism::Template::NodeListField -%>
+ if node.<%= field.name %>.any?
+ table.field("<%= field.name %>", port: true)
+
+ waypoint = "#{id}_<%= field.name %>"
+ digraph.waypoint("#{waypoint};")
+
+ digraph.edge("#{id}:<%= field.name %> -> #{waypoint};")
+ node.<%= field.name %>.each { |child| digraph.edge("#{waypoint} -> #{node_id(child)};") }
+ else
+ table.field("<%= field.name %>", "[]")
+ end
+ <%- when Prism::Template::StringField, Prism::Template::ConstantField, Prism::Template::OptionalConstantField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::ConstantListField, Prism::Template::IntegerField, Prism::Template::DoubleField -%>
+ table.field("<%= field.name %>", node.<%= field.name %>.inspect)
+ <%- when Prism::Template::LocationField -%>
+ table.field("<%= field.name %>", location_inspect(node.<%= field.name %>))
+ <%- when Prism::Template::OptionalLocationField -%>
+ unless (<%= field.name %> = node.<%= field.name %>).nil?
+ table.field("<%= field.name %>", location_inspect(<%= field.name %>))
+ end
+ <%- else -%>
+ <%- raise -%>
+ <%- end -%>
+ <%- end -%>
+
+ digraph.node(<<~DOT)
+ #{id} [
+ label=<#{table.to_dot.gsub(/\n/, "\n ")}>
+ ];
+ DOT
+
+ super
+ end
+ <%- end -%>
+
+ private
+
+ # Generate a unique node ID for a node throughout the digraph.
+ #--
+ #: (node) -> String
+ def node_id(node) # :nodoc:
+ "Node_#{node.object_id}"
+ end
+
+ # Inspect a location to display the start and end line and columns in bytes.
+ #--
+ #: (Location) -> String
+ def location_inspect(location) # :nodoc:
+ "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})"
+ end
+ <%- flags.each do |flag| -%>
+
+ # Inspect a node that has <%= flag.human %> flags to display the flags as a
+ # comma-separated list.
+ #--
+ #: (<%= nodes.filter_map { |node| node.name if node.flags == flag }.join(" | ") %> node) -> String
+ def <%= flag.human %>_inspect(node) # :nodoc:
+ flags = [] #: Array[String]
+ <%- flag.values.each do |value| -%>
+ flags << "<%= value.name.downcase %>" if node.<%= value.name.downcase %>?
+ <%- end -%>
+ flags.join(", ")
+ end
+ <%- end -%>
+ end
+end
diff --git a/prism/templates/lib/prism/dsl.rb.erb b/prism/templates/lib/prism/dsl.rb.erb
new file mode 100644
index 0000000000..be7dc6d9c1
--- /dev/null
+++ b/prism/templates/lib/prism/dsl.rb.erb
@@ -0,0 +1,172 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # The DSL module provides a set of methods that can be used to create prism
+ # nodes in a more concise manner. For example, instead of writing:
+ #
+ # source = Prism::Source.for("[1]", 1, [0])
+ #
+ # Prism::ArrayNode.new(
+ # source,
+ # 0,
+ # Prism::Location.new(source, 0, 3),
+ # 0,
+ # [
+ # Prism::IntegerNode.new(
+ # source,
+ # 0,
+ # Prism::Location.new(source, 1, 1),
+ # Prism::IntegerBaseFlags::DECIMAL,
+ # 1
+ # )
+ # ],
+ # Prism::Location.new(source, 0, 1),
+ # Prism::Location.new(source, 2, 1)
+ # )
+ #
+ # you could instead write:
+ #
+ # class Builder
+ # include Prism::DSL
+ #
+ # attr_reader :default_source
+ #
+ # def initialize
+ # @default_source = source("[1]")
+ # end
+ #
+ # def build
+ # array_node(
+ # location: location(start_offset: 0, length: 3),
+ # elements: [
+ # integer_node(
+ # location: location(start_offset: 1, length: 1),
+ # flags: integer_base_flag(:decimal),
+ # value: 1
+ # )
+ # ],
+ # opening_loc: location(start_offset: 0, length: 1),
+ # closing_loc: location(start_offset: 2, length: 1)
+ # )
+ # end
+ # end
+ #
+ # This is mostly helpful in the context of generating trees programmatically.
+ module DSL
+ # Provide all of these methods as module methods as well, to allow for
+ # building nodes like Prism::DSL.nil_node.
+ extend self
+
+ # Create a new Source object.
+ #--
+ #: (String string) -> Source
+ def source(string)
+ Source.for(string, 1, build_offsets(string))
+ end
+
+ # Create a new Location object.
+ #--
+ #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location
+ def location(source: default_source, start_offset: 0, length: 0)
+ Location.new(source, start_offset, length)
+ end
+ <%- nodes.each do |node| -%>
+ <%-
+ params = [
+ ["source", "Source"],
+ ["node_id", "Integer"],
+ ["location", "Location"],
+ ["flags", "Integer"]
+ ].concat(node.fields.map { |field| [field.name, field.rbs_class] })
+ -%>
+
+ # Create a new <%= node.name %> node.
+ #--
+ #: (<%= params.map { |(name, type)| "?#{name}: #{type}" }.join(", ") %>) -> <%= node.name %>
+ def <%= node.human %>(<%= ["source: default_source", "node_id: 0", "location: default_location", "flags: 0", *node.fields.map { |field|
+ case field
+ when Prism::Template::NodeField
+ kind = field.specific_kind || field.union_kind&.first
+ if kind.nil?
+ "#{field.name}: default_node(source, location)"
+ else
+ "#{field.name}: #{kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}(source: source)"
+ end
+ when Prism::Template::ConstantField
+ "#{field.name}: :\"\""
+ when Prism::Template::OptionalNodeField, Prism::Template::OptionalConstantField, Prism::Template::OptionalLocationField
+ "#{field.name}: nil"
+ when Prism::Template::NodeListField, Prism::Template::ConstantListField
+ "#{field.name}: []"
+ when Prism::Template::StringField
+ "#{field.name}: \"\""
+ when Prism::Template::LocationField
+ "#{field.name}: location"
+ when Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField
+ "#{field.name}: 0"
+ when Prism::Template::DoubleField
+ "#{field.name}: 0.0"
+ else
+ raise
+ end
+ }].join(", ") %>)
+ <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+ end
+ <%- end -%>
+ <%- flags.each do |flag| -%>
+
+ # Retrieve the value of one of the <%= flag.name %> flags.
+ #--
+ #: (Symbol name) -> Integer
+ def <%= flag.human.chomp("s") %>(name)
+ case name
+ <%- flag.values.each do |value| -%>
+ when :<%= value.name.downcase %> then <%= flag.name %>::<%= value.name %>
+ <%- end -%>
+ else Kernel.raise ArgumentError, "invalid <%= flag.name %> flag: #{name.inspect}"
+ end
+ end
+ <%- end -%>
+
+ private
+
+ # The default source object that gets attached to nodes and locations if no
+ # source is specified.
+ #--
+ #: () -> Source
+ def default_source
+ Source.for("", 1, [0])
+ end
+
+ # The default location object that gets attached to nodes if no location is
+ # specified, which uses the given source.
+ #--
+ #: () -> Location
+ def default_location
+ Location.new(default_source, 0, 0)
+ end
+
+ # The default node that gets attached to nodes if no node is specified for a
+ # required node field.
+ #--
+ #: (Source source, Location location) -> node
+ def default_node(source, location)
+ ErrorRecoveryNode.new(source, -1, location, 0, nil)
+ end
+
+ private
+
+ # Build the newline byte offset array for the given source string.
+ #--
+ #: (String source) -> Array[Integer]
+ def build_offsets(source)
+ offsets = [0]
+ start = 0
+ while (index = source.byteindex("\n", start))
+ offsets << (start = index + 1)
+ end
+ offsets
+ end
+ end
+end
diff --git a/prism/templates/lib/prism/inspect_visitor.rb.erb b/prism/templates/lib/prism/inspect_visitor.rb.erb
new file mode 100644
index 0000000000..820f5ae75f
--- /dev/null
+++ b/prism/templates/lib/prism/inspect_visitor.rb.erb
@@ -0,0 +1,147 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # This visitor is responsible for composing the strings that get returned by
+ # the various #inspect methods defined on each of the nodes.
+ class InspectVisitor < Visitor
+ # Most of the time, we can simply pass down the indent to the next node.
+ # However, when we are inside a list we want some extra special formatting
+ # when we hit an element in that list. In this case, we have a special
+ # command that replaces the subsequent indent with the given value.
+ class Replace # :nodoc:
+ attr_reader :value #: String
+
+ #: (String value) -> void
+ def initialize(value)
+ @value = value
+ end
+ end
+
+ private_constant :Replace
+
+ # The current prefix string.
+ # :stopdoc:
+ attr_reader :indent #: String
+ # :startdoc:
+
+ # The list of commands that we need to execute in order to compose the
+ # final string.
+ #: stopdoc:
+ attr_reader :commands #: Array[[String | node | Replace, String]]
+ # :startdoc:
+
+ #: (?String indent) -> void
+ def initialize(indent = +"") # :nodoc:
+ @indent = indent
+ @commands = []
+ end
+
+ # Compose an inspect string for the given node.
+ #--
+ #: (node node) -> String
+ def self.compose(node)
+ visitor = new
+ node.accept(visitor)
+ visitor.compose
+ end
+
+ # Compose the final string.
+ #--
+ #: () -> String
+ def compose # :nodoc:
+ buffer = +""
+ replace = nil
+
+ until commands.empty?
+ # @type var command: String | node | Replace
+ # @type var indent: String
+ command, indent = *commands.shift
+
+ case command
+ when String
+ buffer << (replace || indent)
+ buffer << command
+ replace = nil
+ when Node
+ visitor = InspectVisitor.new(indent)
+ command.accept(visitor)
+ @commands = [*visitor.commands, *@commands]
+ when Replace
+ replace = command.value
+ else
+ raise "Unknown command: #{command.inspect}"
+ end
+ end
+
+ buffer
+ end
+ <%- nodes.each do |node| -%>
+
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node) # :nodoc:
+ commands << [inspect_node(<%= node.name.inspect %>, node), indent]
+ <%- (fields = [node.flags || Prism::Template::Flags.empty, *node.fields]).each_with_index do |field, index| -%>
+ <%- pointer = index == fields.length - 1 ? "└── " : "├── " -%>
+ <%- preadd = index == fields.length - 1 ? " " : "│ " -%>
+ <%- case field -%>
+ <%- when Prism::Template::Flags -%>
+ flags = [("newline" if node.newline?), ("static_literal" if node.static_literal?), <%= field.values.map { |value| "(\"#{value.name.downcase}\" if node.#{value.name.downcase}?)" }.join(", ") %>].compact
+ commands << ["<%= pointer %>flags: #{flags.empty? ? "∅" : flags.join(", ")}\n", indent]
+ <%- when Prism::Template::NodeListField -%>
+ commands << ["<%= pointer %><%= field.name %>: (length: #{(<%= field.name %> = node.<%= field.name %>).length})\n", indent]
+ if <%= field.name %>.any?
+ <%= field.name %>[0...-1].each do |child|
+ commands << [Replace.new("#{indent}<%= preadd %>├── "), indent]
+ commands << [child, "#{indent}<%= preadd %>│ "]
+ end
+ commands << [Replace.new("#{indent}<%= preadd %>└── "), indent]
+ commands << [<%= field.name %>[-1], "#{indent}<%= preadd %> "]
+ end
+ <%- when Prism::Template::NodeField -%>
+ commands << ["<%= pointer %><%= field.name %>:\n", indent]
+ commands << [node.<%= field.name %>, "#{indent}<%= preadd %>"]
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (<%= field.name %> = node.<%= field.name %>).nil?
+ commands << ["<%= pointer %><%= field.name %>: ∅\n", indent]
+ else
+ commands << ["<%= pointer %><%= field.name %>:\n", indent]
+ commands << [<%= field.name %>, "#{indent}<%= preadd %>"]
+ end
+ <%- when Prism::Template::ConstantField, Prism::Template::ConstantListField, Prism::Template::StringField, Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField, Prism::Template::DoubleField -%>
+ commands << ["<%= pointer %><%= field.name %>: #{node.<%= field.name %>.inspect}\n", indent]
+ <%- when Prism::Template::OptionalConstantField -%>
+ if (<%= field.name %> = node.<%= field.name %>).nil?
+ commands << ["<%= pointer %><%= field.name %>: ∅\n", indent]
+ else
+ commands << ["<%= pointer %><%= field.name %>: #{<%= field.name %>.inspect}\n", indent]
+ end
+ <%- when Prism::Template::LocationField, Prism::Template::OptionalLocationField -%>
+ commands << ["<%= pointer %><%= field.name %>: #{inspect_location(node.<%= field.name %>)}\n", indent]
+ <%- end -%>
+ <%- end -%>
+ end
+ <%- end -%>
+
+ private
+
+ # Compose a header for the given node.
+ #--
+ #: (String name, node node) -> String
+ def inspect_node(name, node) # :nodoc:
+ location = node.location
+ "@ #{name} (location: (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}))\n"
+ end
+
+ # Compose a string representing the given inner location field.
+ #--
+ #: (Location? location) -> String
+ def inspect_location(location) # :nodoc:
+ if location
+ "(#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column}) = #{location.slice.inspect}"
+ else
+ "∅"
+ end
+ end
+ end
+end
diff --git a/prism/templates/lib/prism/mutation_compiler.rb.erb b/prism/templates/lib/prism/mutation_compiler.rb.erb
new file mode 100644
index 0000000000..2d555048d2
--- /dev/null
+++ b/prism/templates/lib/prism/mutation_compiler.rb.erb
@@ -0,0 +1,22 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # This visitor walks through the tree and copies each node as it is being
+ # visited. This is useful for consumers that want to mutate the tree, as you
+ # can change subtrees in place without effecting the rest of the tree.
+ class MutationCompiler < Compiler
+ <%- nodes.each_with_index do |node, index| -%>
+<%= "\n" if index != 0 -%>
+ #: (<%= node.name %>) -> node?
+ def visit_<%= node.human %>(node) # :nodoc:
+ <%- fields = node.fields.select { |field| [Prism::Template::NodeField, Prism::Template::OptionalNodeField, Prism::Template::NodeListField].include?(field.class) } -%>
+ <%- if fields.any? -%>
+ node.copy(<%= fields.map { |field| "#{field.name}: #{field.is_a?(Prism::Template::NodeListField) ? "visit_all" : "visit"}(node.#{field.name})" }.join(", ") %>)
+ <%- else -%>
+ node.copy
+ <%- end -%>
+ end
+ <%- end -%>
+ end
+end
diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb
new file mode 100644
index 0000000000..fb13051aba
--- /dev/null
+++ b/prism/templates/lib/prism/node.rb.erb
@@ -0,0 +1,748 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # @rbs!
+ # interface _Repository
+ # def enter: (Integer node_id, Symbol field_name) -> Relocation::Entry
+ # end
+ #
+ # interface _Node
+ # def deconstruct: () -> Array[Prism::node?]
+ # def inspect: () -> String
+ # end
+ #
+ # type node = Node & _Node
+
+ # This represents a node in the tree. It is the parent class of all of the
+ # various node types.
+ class Node
+ # A pointer to the source that this node was created from.
+ # :stopdoc:
+ attr_reader :source #: Source
+ private :source
+ # :startdoc:
+
+ # A unique identifier for this node. This is used in a very specific
+ # use case where you want to keep around a reference to a node without
+ # having to keep around the syntax tree in memory. This unique identifier
+ # will be consistent across multiple parses of the same source code.
+ attr_reader :node_id #: Integer
+
+ # The location associated with this node. For lazily loading Location
+ # objects, we keep it as a packed integer until it is accessed.
+ # @rbs @location: Location | Integer
+
+ # Save this node using a saved source so that it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry
+ def save(repository)
+ repository.enter(node_id, :itself)
+ end
+
+ # A Location instance that represents the location of this node in the
+ # source.
+ #--
+ #: () -> Location
+ def location
+ location = @location
+ return location if location.is_a?(Location)
+ @location = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+ end
+
+ # Save the location using a saved source so that it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry
+ def save_location(repository)
+ repository.enter(node_id, :location)
+ end
+
+ # --------------------------------------------------------------------------
+ # :section: Location Delegators
+ # These methods provide convenient access to the underlying Location object.
+ # --------------------------------------------------------------------------
+
+ # Delegates to [`start_line`](rdoc-ref:Location#start_line) of the associated location object.
+ #--
+ #: () -> Integer
+ def start_line
+ location.start_line
+ end
+
+ # Delegates to [`end_line`](rdoc-ref:Location#end_line) of the associated location object.
+ #--
+ #: () -> Integer
+ def end_line
+ location.end_line
+ end
+
+ # Delegates to [`start_offset`](rdoc-ref:Location#start_offset) of the associated location object.
+ #--
+ #: () -> Integer
+ def start_offset
+ location = @location
+ location.is_a?(Location) ? location.start_offset : location >> 32
+ end
+
+ # Delegates to [`end_offset`](rdoc-ref:Location#end_offset) of the associated location object.
+ #--
+ #: () -> Integer
+ def end_offset
+ location = @location
+ location.is_a?(Location) ? location.end_offset : ((location >> 32) + (location & 0xFFFFFFFF))
+ end
+
+ # Delegates to [`start_character_offset`](rdoc-ref:Location#start_character_offset)
+ # of the associated location object.
+ #--
+ #: () -> Integer
+ def start_character_offset
+ location.start_character_offset
+ end
+
+ # Delegates to [`end_character_offset`](rdoc-ref:Location#end_character_offset)
+ # of the associated location object.
+ #--
+ #: () -> Integer
+ def end_character_offset
+ location.end_character_offset
+ end
+
+ # Delegates to [`cached_start_code_units_offset`](rdoc-ref:Location#cached_start_code_units_offset)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_start_code_units_offset(cache)
+ location.cached_start_code_units_offset(cache)
+ end
+
+ # Delegates to [`cached_end_code_units_offset`](rdoc-ref:Location#cached_end_code_units_offset)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_end_code_units_offset(cache)
+ location.cached_end_code_units_offset(cache)
+ end
+
+ # Delegates to [`start_column`](rdoc-ref:Location#start_column) of the associated location object.
+ #--
+ #: () -> Integer
+ def start_column
+ location.start_column
+ end
+
+ # Delegates to [`end_column`](rdoc-ref:Location#end_column) of the associated location object.
+ #--
+ #: () -> Integer
+ def end_column
+ location.end_column
+ end
+
+ # Delegates to [`start_character_column`](rdoc-ref:Location#start_character_column)
+ # of the associated location object.
+ #--
+ #: () -> Integer
+ def start_character_column
+ location.start_character_column
+ end
+
+ # Delegates to [`end_character_column`](rdoc-ref:Location#end_character_column)
+ # of the associated location object.
+ #--
+ #: () -> Integer
+ def end_character_column
+ location.end_character_column
+ end
+
+ # Delegates to [`cached_start_code_units_column`](rdoc-ref:Location#cached_start_code_units_column)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_start_code_units_column(cache)
+ location.cached_start_code_units_column(cache)
+ end
+
+ # Delegates to [`cached_end_code_units_column`](rdoc-ref:Location#cached_end_code_units_column)
+ # of the associated location object.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_end_code_units_column(cache)
+ location.cached_end_code_units_column(cache)
+ end
+
+ # Delegates to [`leading_comments`](rdoc-ref:Location#leading_comments) of the associated location object.
+ #--
+ #: () -> Array[Comment]
+ def leading_comments
+ location.leading_comments
+ end
+
+ # Delegates to [`trailing_comments`](rdoc-ref:Location#trailing_comments) of the associated location object.
+ #--
+ #: () -> Array[Comment]
+ def trailing_comments
+ location.trailing_comments
+ end
+
+ # Delegates to [`comments`](rdoc-ref:Location#comments) of the associated location object.
+ #--
+ #: () -> Array[Comment]
+ def comments
+ location.comments
+ end
+
+ # :section:
+
+ # Returns all of the lines of the source code associated with this node.
+ #--
+ #: () -> Array[String]
+ def source_lines
+ location.source_lines
+ end
+
+ # An alias for source_lines, used to mimic the API from
+ # RubyVM::AbstractSyntaxTree to make it easier to migrate.
+ alias script_lines source_lines
+
+ # Slice the location of the node from the source.
+ #--
+ #: () -> String
+ def slice
+ location.slice
+ end
+
+ # Slice the location of the node from the source, starting at the beginning
+ # of the line that the location starts on, ending at the end of the line
+ # that the location ends on.
+ #--
+ #: () -> String
+ def slice_lines
+ location.slice_lines
+ end
+
+ # An bitset of flags for this node. There are certain flags that are common
+ # for all nodes, and then some nodes have specific flags.
+ # :stopdoc:
+ attr_reader :flags #: Integer
+ protected :flags
+ # :startdoc:
+
+ # Returns true if the node has the newline flag set.
+ #--
+ #: () -> bool
+ def newline?
+ flags.anybits?(NodeFlags::NEWLINE)
+ end
+
+ # Returns true if the node has the static literal flag set.
+ #--
+ #: () -> bool
+ def static_literal?
+ flags.anybits?(NodeFlags::STATIC_LITERAL)
+ end
+
+ # Similar to inspect, but respects the current level of indentation given by
+ # the pretty print object.
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
+ q.seplist(inspect.chomp.each_line, -> { q.breakable }) do |line|
+ q.text(line.chomp)
+ end
+ q.current_group.break
+ end
+
+ # Convert this node into a graphviz dot graph string.
+ #--
+ #: () -> String
+ def to_dot
+ # @type self: node
+ DotVisitor.new.tap { |visitor| accept(visitor) }.to_dot
+ end
+
+ # Returns a list of nodes that are descendants of this node that contain the
+ # given line and column. This is useful for locating a node that is selected
+ # based on the line and column of the source code.
+ #
+ # Important to note is that the column given to this method should be in
+ # bytes, as opposed to characters or code units.
+ #--
+ #: (Integer line, Integer column) -> Array[node]
+ def tunnel(line, column)
+ queue = [self] #: Array[node]
+ result = [] #: Array[node]
+ offset = source.byte_offset(line, column)
+
+ while (node = queue.shift)
+ result << node
+
+ node.each_child_node do |child_node|
+ if child_node.start_offset <= offset && offset < child_node.end_offset
+ queue << child_node
+ break
+ end
+ end
+ end
+
+ result
+ end
+
+ # Returns the first node that matches the given block when visited in a
+ # breadth-first search. This is useful for finding a node that matches a
+ # particular condition.
+ #
+ # node.breadth_first_search { |node| node.node_id == node_id }
+ #--
+ #: () { (node) -> bool } -> node?
+ def breadth_first_search(&blk)
+ queue = [self] #: Array[node]
+
+ while (node = queue.shift)
+ return node if yield node
+ queue.concat(node.compact_child_nodes)
+ end
+
+ nil
+ end
+ alias find breadth_first_search
+
+ # Returns all of the nodes that match the given block when visited in a
+ # breadth-first search. This is useful for finding all nodes that match a
+ # particular condition.
+ #
+ # node.breadth_first_search_all { |node| node.is_a?(Prism::CallNode) }
+ #--
+ #: () { (node) -> bool } -> Array[node]
+ def breadth_first_search_all(&blk)
+ queue = [self] #: Array[Prism::node]
+ results = [] #: Array[Prism::node]
+
+ while (node = queue.shift)
+ results << node if yield node
+ queue.concat(node.compact_child_nodes)
+ end
+
+ results
+ end
+ alias find_all breadth_first_search_all
+
+ # Returns a list of the fields that exist for this node class. Fields
+ # describe the structure of the node. This kind of reflection is useful for
+ # things like recursively visiting each node _and_ field in the tree.
+ #--
+ #: () -> Array[Reflection::Field]
+ def self.fields
+ # This method should only be called on subclasses of Node, not Node
+ # itself.
+ raise NoMethodError, "undefined method `fields' for #{inspect}" if self == Node
+
+ Reflection.fields_for(self)
+ end
+
+ # --------------------------------------------------------------------------
+ # :section: Node Interface
+ # These methods are effectively abstract methods that are implemented by
+ # the various subclasses of Node.
+ # --------------------------------------------------------------------------
+
+ # Accepts a visitor and calls back into the specialized visit function.
+ #--
+ #: (_Visitor visitor) -> untyped
+ def accept(visitor)
+ raise NoMethodError, "undefined method `accept' for #{inspect}"
+ end
+
+ # Returns an array of child nodes, including `nil`s in the place of optional
+ # nodes that were not present.
+ #--
+ #: () -> Array[node?]
+ def child_nodes
+ raise NoMethodError, "undefined method `child_nodes' for #{inspect}"
+ end
+
+ alias deconstruct child_nodes
+
+ # With a block given, yields each child node. Without a block, returns
+ # an enumerator that contains each child node. Excludes any `nil`s in
+ # the place of optional nodes that were not present.
+ #--
+ #: () -> Enumerator[node, void]
+ #: () { (node) -> void } -> void
+ def each_child_node(&blk)
+ raise NoMethodError, "undefined method `each_child_node' for #{inspect}"
+ end
+
+ # Returns an array of child nodes, excluding any `nil`s in the place of
+ # optional nodes that were not present.
+ #--
+ #: () -> Array[node]
+ def compact_child_nodes
+ raise NoMethodError, "undefined method `compact_child_nodes' for #{inspect}"
+ end
+
+ # Returns an array of child nodes and locations that could potentially have
+ # comments attached to them.
+ #--
+ #: () -> Array[node | Location]
+ def comment_targets
+ raise NoMethodError, "undefined method `comment_targets' for #{inspect}"
+ end
+
+ # Returns a string representation of the node.
+ #--
+ #: () -> String
+ def inspect
+ raise NoMethodError, "undefined method `inspect' for #{inspect}"
+ end
+
+ # Sometimes you want to check an instance of a node against a list of
+ # classes to see what kind of behavior to perform. Usually this is done by
+ # calling `[cls1, cls2].include?(node.class)` or putting the node into a
+ # case statement and doing `case node; when cls1; when cls2; end`. Both of
+ # these approaches are relatively slow because of the constant lookups,
+ # method calls, and/or array allocations.
+ #
+ # Instead, you can call #type, which will return to you a symbol that you
+ # can use for comparison. This is faster than the other approaches because
+ # it uses a single integer comparison, but also because if you're on CRuby
+ # you can take advantage of the fact that case statements with all symbol
+ # keys will use a jump table.
+ #--
+ #: () -> Symbol
+ def type
+ raise NoMethodError, "undefined method `type' for #{inspect}"
+ end
+
+ # Similar to #type, this method returns a symbol that you can use for
+ # splitting on the type of the node without having to do a long === chain.
+ # Note that like #type, it will still be slower than using == for a single
+ # class, but should be faster in a case statement or an array comparison.
+ #--
+ #: () -> Symbol
+ def self.type
+ raise NoMethodError, "undefined method `type' for #{inspect}"
+ end
+ end
+ <%- nodes.each do |node| -%>
+
+ <%- node.each_comment_line do |line| -%>
+ #<%= line %>
+ <%- end -%>
+ class <%= node.name -%> < Node
+ <%- node.fields.each do |field| -%>
+ # @rbs @<%= field.name %>: <%= field.rbs_class %>
+ <%- end -%>
+
+ # Initialize a new <%= node.name %> node.
+ #--
+ #: (Source source, Integer node_id, Location location, Integer flags, <%= node.fields.map { |field| "#{field.rbs_class} #{field.name}" }.join(", ") %>) -> void
+ def initialize(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+ @source = source
+ @node_id = node_id
+ @location = location
+ @flags = flags
+ <%- node.fields.each do |field| -%>
+ <%- if Prism::Template::CHECK_FIELD_KIND && field.respond_to?(:check_field_kind) -%>
+ raise "<%= node.name %>#<%= field.name %> was of unexpected type:\n#{<%= field.name %>.inspect}" unless <%= field.check_field_kind %>
+ <%- end -%>
+ @<%= field.name %> = <%= field.name %>
+ <%- end -%>
+ end
+
+ # ---------
+ # :section: Repository
+ # Methods related to Relocation.
+ # ---------
+
+ # ----------------------------------------------------------------------------------
+ # :section: Node Interface
+ # These methods are present on all subclasses of Node.
+ # Read the [node interface docs](Node.html#node-interface) for more information.
+ # ----------------------------------------------------------------------------------
+
+ # See Node.accept.
+ #--
+ #: (_Visitor visitor) -> untyped
+ def accept(visitor)
+ visitor.visit_<%= node.human %>(self)
+ end
+
+ # See Node.child_nodes.
+ #--
+ #: () -> Array[node?]
+ def child_nodes
+ [<%= node.fields.map { |field|
+ case field
+ when Prism::Template::NodeField, Prism::Template::OptionalNodeField then field.name
+ when Prism::Template::NodeListField then "*#{field.name}"
+ end
+ }.compact.join(", ") %>]
+ end
+
+ # See Node.each_child_node.
+ #--
+ #: () -> Enumerator[node, void]
+ #: () { (node) -> void } -> void
+ def each_child_node(&blk)
+ return to_enum(:each_child_node) unless block_given?
+
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ yield <%= field.name %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (<%= field.name %> = self.<%= field.name %>); yield <%= field.name %>; end
+ <%- when Prism::Template::NodeListField -%>
+ <%= field.name %>.each { |node| yield node }
+ <%- end -%>
+ <%- end -%>
+ end
+
+ # See Node.compact_child_nodes.
+ #--
+ #: () -> Array[node]
+ def compact_child_nodes
+ <%- if node.fields.any? { |field| field.is_a?(Prism::Template::OptionalNodeField) } -%>
+ compact = [] #: Array[Prism::node]
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ compact << <%= field.name %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (<%= field.name %> = self.<%= field.name %>); compact << <%= field.name %>; end
+ <%- when Prism::Template::NodeListField -%>
+ compact.concat(<%= field.name %>)
+ <%- end -%>
+ <%- end -%>
+ compact
+ <%- else -%>
+ [<%= node.fields.map { |field|
+ case field
+ when Prism::Template::NodeField then field.name
+ when Prism::Template::NodeListField then "*#{field.name}"
+ end
+ }.compact.join(", ") %>]
+ <%- end -%>
+ end
+
+ # See Node.comment_targets.
+ #--
+ #: () -> Array[node | Location]
+ def comment_targets
+ [<%= node.fields.map { |field|
+ case field
+ when Prism::Template::NodeField, Prism::Template::LocationField then field.name
+ when Prism::Template::OptionalNodeField, Prism::Template::NodeListField, Prism::Template::OptionalLocationField then "*#{field.name}"
+ end
+ }.compact.join(", ") %>] #: Array[Prism::node | Location]
+ end
+
+ # :call-seq:
+ # copy(**fields) -> <%= node.name %>
+ #
+ # Creates a copy of self with the given fields, using self as the template.
+ #--
+ #: (?node_id: Integer, ?location: Location, ?flags: Integer, <%= node.fields.map { |field| "?#{field.name}: #{field.rbs_class}" }.join(", ") %>) -> <%= node.name %>
+ def copy(<%= (["node_id", "location", "flags"] + node.fields.map(&:name)).map { |field| "#{field}: self.#{field}" }.join(", ") %>)
+ <%= node.name %>.new(<%= ["source", "node_id", "location", "flags", *node.fields.map(&:name)].join(", ") %>)
+ end
+
+ alias deconstruct child_nodes
+
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
+ { <%= (["node_id: node_id", "location: location"] + node.fields.map { |field| "#{field.name}: #{field.name}" }).join(", ") %> }
+ end
+
+ # See `Node#type`.
+ #--
+ #: () -> :<%= node.human %>
+ def type
+ :<%= node.human %>
+ end
+
+ # See `Node.type`.
+ #--
+ #: () -> :<%= node.human %>
+ def self.type
+ :<%= node.human %>
+ end
+
+ #: () -> String
+ def inspect # :nodoc:
+ InspectVisitor.compose(self)
+ end
+
+ # :section:
+
+ <%- if (node_flags = node.flags) -%>
+ <%- node_flags.values.each do |value| -%>
+ # :category: Flags
+ # <%= value.comment %>
+ #--
+ #: () -> bool
+ def <%= value.name.downcase %>?
+ flags.anybits?(<%= node_flags.name %>::<%= value.name %>)
+ end
+
+ <%- end -%>
+ <%- end -%>
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::LocationField -%>
+ # :category: Locations
+ # :call-seq:
+ # <%= field.name %> -> <%= field.call_seq_type %>
+ #
+ <%- if field.comment.nil? -%>
+ # Returns the Location represented by `<%= field.name %>`.
+ <%- else -%>
+ <%- field.each_comment_line do |line| -%>
+ #<%= line %>
+ <%- end -%>
+ <%- end -%>
+ #--
+ #: () -> Location
+ def <%= field.name %>
+ location = @<%= field.name %>
+ return location if location.is_a?(Location)
+ @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+ end
+
+ # :category: Repository
+ # Save the <%= field.name %> location using the given saved source so that
+ # it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry
+ def save_<%= field.name %>(repository)
+ repository.enter(node_id, :<%= field.name %>)
+ end
+
+ <%- when Prism::Template::OptionalLocationField -%>
+ # :category: Locations
+ # :call-seq:
+ # <%= field.name %> -> <%= field.call_seq_type %>
+ #
+ <%- if field.comment.nil? -%>
+ # Returns the Location represented by `<%= field.name %>`.
+ <%- else -%>
+ <%- field.each_comment_line do |line| -%>
+ #<%= line %>
+ <%- end -%>
+ <%- end -%>
+ #--
+ #: () -> Location?
+ def <%= field.name %>
+ location = @<%= field.name %>
+ case location
+ when nil
+ nil
+ when Location
+ location
+ else
+ @<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
+ end
+ end
+
+ # :category: Repository
+ # Save the <%= field.name %> location using the given saved source so that
+ # it can be retrieved later.
+ #--
+ #: (_Repository repository) -> Relocation::Entry?
+ def save_<%= field.name %>(repository)
+ repository.enter(node_id, :<%= field.name %>) unless @<%= field.name %>.nil?
+ end
+ <%- else -%>
+ # :call-seq:
+ # <%= field.name %> -> <%= field.call_seq_type %>
+ #
+ <%- if field.comment.nil? -%>
+ # Returns the `<%= field.name %>` attribute.
+ <%- else -%>
+ <%- field.each_comment_line do |line| -%>
+ #<%= line %>
+ <%- end -%>
+ <%- end -%>
+ #--
+ #: () -> <%= field.rbs_class %>
+ def <%= field.name %>
+ @<%= field.name %>
+ end
+
+ <%- end -%>
+ <%- end -%>
+ # :section: Slicing
+
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::LocationField -%>
+ <%- raise unless field.name.end_with?("_loc") -%>
+ <%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
+ # :call-seq:
+ # <%= field.name.delete_suffix("_loc") %> -> String
+ #
+ # Slice the location of <%= field.name %> from the source.
+ #--
+ #: () -> String
+ def <%= field.name.delete_suffix("_loc") %>
+ <%= field.name %>.slice
+ end
+
+ <%- when Prism::Template::OptionalLocationField -%>
+ <%- raise unless field.name.end_with?("_loc") -%>
+ <%- next if node.fields.any? { |other| other.name == field.name.delete_suffix("_loc") } -%>
+ # :call-seq:
+ # <%= field.name.delete_suffix("_loc") %> -> String | nil
+ #
+ # Slice the location of <%= field.name %> from the source.
+ #--
+ #: () -> String?
+ def <%= field.name.delete_suffix("_loc") %>
+ <%= field.name %>&.slice
+ end
+
+ <%- end -%>
+ <%- end -%>
+ # :section:
+
+ #: (untyped other) -> boolish
+ def ===(other) # :nodoc:
+ other.is_a?(<%= node.name %>)<%= " &&" if (fields = [*node.flags, *node.fields]).any? %>
+ <%- fields.each_with_index do |field, index| -%>
+ <%- if field.is_a?(Prism::Template::LocationField) || field.is_a?(Prism::Template::OptionalLocationField) -%>
+ (<%= field.name %>.nil? == other.<%= field.name %>.nil?)<%= " &&" if index != fields.length - 1 %>
+ <%- elsif field.is_a?(Prism::Template::NodeListField) || field.is_a?(Prism::Template::ConstantListField) -%>
+ (<%= field.name %>.length == other.<%= field.name %>.length) &&
+ <%= field.name %>.zip(other.<%= field.name %>).all? { |left, right| left === right }<%= " &&" if index != fields.length - 1 %>
+ <%- elsif field.is_a?(Prism::Template::Flags) -%>
+ (flags === other.flags)<%= " &&" if index != fields.length - 1 %>
+ <%- else -%>
+ (<%= field.name %> === other.<%= field.name %>)<%= " &&" if index != fields.length - 1 %>
+ <%- end -%>
+ <%- end -%>
+ end
+ end
+ <%- end -%>
+ <%- flags.each do |flag| -%>
+
+ # <%= flag.comment %>
+ module <%= flag.name %>
+ <%- flag.values.each_with_index do |value, index| -%>
+ # <%= value.comment %>
+ <%= value.name %> = 1 << <%= index + Prism::Template::COMMON_FLAGS_COUNT %>
+<%= "\n" if value != flag.values.last -%>
+ <%- end -%>
+ end
+ <%- end -%>
+
+ # The flags that are common to all nodes.
+ module NodeFlags
+ # A flag to indicate that the node is a candidate to emit a :line event
+ # through tracepoint when compiled.
+ NEWLINE = 1
+
+ # A flag to indicate that the value that the node represents is a value that
+ # can be determined at parse-time.
+ STATIC_LITERAL = 2
+ end
+end
diff --git a/prism/templates/lib/prism/reflection.rb.erb b/prism/templates/lib/prism/reflection.rb.erb
new file mode 100644
index 0000000000..0012f120b2
--- /dev/null
+++ b/prism/templates/lib/prism/reflection.rb.erb
@@ -0,0 +1,145 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # The Reflection module provides the ability to reflect on the structure of
+ # the syntax tree itself, as opposed to looking at a single syntax tree. This
+ # is useful in metaprogramming contexts.
+ module Reflection
+ # A field represents a single piece of data on a node. It is the base class
+ # for all other field types.
+ class Field
+ # The name of the field.
+ attr_reader :name #: Symbol
+
+ # Initializes the field with the given name.
+ #--
+ #: (Symbol name) -> void
+ def initialize(name)
+ @name = name
+ end
+ end
+
+ # A node field represents a single child node in the syntax tree. It
+ # resolves to a Prism::Node in Ruby.
+ class NodeField < Field
+ end
+
+ # An optional node field represents a single child node in the syntax tree
+ # that may or may not be present. It resolves to either a Prism::Node or nil
+ # in Ruby.
+ class OptionalNodeField < Field
+ end
+
+ # A node list field represents a list of child nodes in the syntax tree. It
+ # resolves to an array of Prism::Node instances in Ruby.
+ class NodeListField < Field
+ end
+
+ # A constant field represents a constant value on a node. Effectively, it
+ # represents an identifier found within the source. It resolves to a symbol
+ # in Ruby.
+ class ConstantField < Field
+ end
+
+ # An optional constant field represents a constant value on a node that may
+ # or may not be present. It resolves to either a symbol or nil in Ruby.
+ class OptionalConstantField < Field
+ end
+
+ # A constant list field represents a list of constant values on a node. It
+ # resolves to an array of symbols in Ruby.
+ class ConstantListField < Field
+ end
+
+ # A string field represents a string value on a node. It almost always
+ # represents the unescaped value of a string-like literal. It resolves to a
+ # string in Ruby.
+ class StringField < Field
+ end
+
+ # A location field represents the location of some part of the node in the
+ # source code. For example, the location of a keyword or an operator. It
+ # resolves to a Prism::Location in Ruby.
+ class LocationField < Field
+ end
+
+ # An optional location field represents the location of some part of the
+ # node in the source code that may or may not be present. It resolves to
+ # either a Prism::Location or nil in Ruby.
+ class OptionalLocationField < Field
+ end
+
+ # An integer field represents an integer value. It is used to represent the
+ # value of an integer literal, the depth of local variables, and the number
+ # of a numbered reference. It resolves to an Integer in Ruby.
+ class IntegerField < Field
+ end
+
+ # A float field represents a double-precision floating point value. It is
+ # used exclusively to represent the value of a floating point literal. It
+ # resolves to a Float in Ruby.
+ class FloatField < Field
+ end
+
+ # A flags field represents a bitset of flags on a node. It resolves to an
+ # integer in Ruby. Note that the flags cannot be accessed directly on the
+ # node because the integer is kept private. Instead, the various flags in
+ # the bitset should be accessed through their query methods.
+ class FlagsField < Field
+ # The names of the flags in the bitset.
+ attr_reader :flags #: Array[Symbol]
+
+ # Initializes the flags field with the given name and flags.
+ #--
+ #: (Symbol name, Array[Symbol] flags) -> void
+ def initialize(name, flags)
+ super(name)
+ @flags = flags
+ end
+ end
+
+ # Returns the fields for the given node.
+ #--
+ #: (singleton(Node) node) -> Array[Field]
+ def self.fields_for(node)
+ case node.type
+ <%- nodes.each do |node| -%>
+ when :<%= node.human %>
+ [<%= [*node.flags, *node.fields].map { |field|
+ case field
+ when Prism::Template::NodeField
+ "NodeField.new(:#{field.name})"
+ when Prism::Template::OptionalNodeField
+ "OptionalNodeField.new(:#{field.name})"
+ when Prism::Template::NodeListField
+ "NodeListField.new(:#{field.name})"
+ when Prism::Template::ConstantField
+ "ConstantField.new(:#{field.name})"
+ when Prism::Template::OptionalConstantField
+ "OptionalConstantField.new(:#{field.name})"
+ when Prism::Template::ConstantListField
+ "ConstantListField.new(:#{field.name})"
+ when Prism::Template::StringField
+ "StringField.new(:#{field.name})"
+ when Prism::Template::LocationField
+ "LocationField.new(:#{field.name})"
+ when Prism::Template::OptionalLocationField
+ "OptionalLocationField.new(:#{field.name})"
+ when Prism::Template::UInt8Field, Prism::Template::UInt32Field, Prism::Template::IntegerField
+ "IntegerField.new(:#{field.name})"
+ when Prism::Template::DoubleField
+ "FloatField.new(:#{field.name})"
+ when Prism::Template::Flags
+ "FlagsField.new(:flags, [#{field.values.map { |value| ":#{value.name.downcase}?" }.join(", ")}])"
+ else
+ raise field.class.name
+ end
+ }.join(", ") %>]
+ <%- end -%>
+ else
+ raise "Unknown node type: #{node.type.inspect}"
+ end
+ end
+ end
+end
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
new file mode 100644
index 0000000000..a676f957af
--- /dev/null
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -0,0 +1,702 @@
+#--
+# rbs_inline: enabled
+
+require "stringio"
+require_relative "polyfill/unpack1"
+
+module Prism
+ # A module responsible for deserializing parse results.
+ module Serialize # :nodoc:
+ # The major version of prism that we are expecting to find in the serialized
+ # strings.
+ MAJOR_VERSION = 1
+
+ # The minor version of prism that we are expecting to find in the serialized
+ # strings.
+ MINOR_VERSION = 9
+
+ # The patch version of prism that we are expecting to find in the serialized
+ # strings.
+ PATCH_VERSION = 0
+
+ # Deserialize the dumped output from a request to parse or parse_file.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> ParseResult
+ def self.load_parse(input, serialized, freeze)
+ input = input.dup
+ source = Source.for(input, 1, [])
+ loader = Loader.new(source, serialized)
+
+ loader.load_header
+ encoding = loader.load_encoding
+ start_line = loader.load_varsint
+ offsets = loader.load_line_offsets(freeze)
+
+ source.replace_start_line(start_line)
+ source.replace_offsets(offsets)
+
+ comments = loader.load_comments(freeze)
+ magic_comments = loader.load_magic_comments(freeze)
+ data_loc = loader.load_optional_location_object(freeze)
+ errors = loader.load_errors(encoding, freeze)
+ warnings = loader.load_warnings(encoding, freeze)
+ continuable = loader.load_bool
+ cpool_base = loader.load_uint32
+ cpool_size = loader.load_varuint
+
+ constant_pool = ConstantPool.new(serialized, cpool_base, cpool_size)
+
+ node = loader.load_node(constant_pool, encoding, freeze) #: ProgramNode
+ loader.load_constant_pool(constant_pool)
+ raise unless loader.eof?
+
+ result = ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ result.freeze if freeze
+
+ input.force_encoding(encoding)
+
+ # This is an extremely niche use-case where the file was marked as binary
+ # but it contained UTF-8-encoded characters. In that case we will actually
+ # put it back to UTF-8 to give the location APIs the best chance of being
+ # correct.
+ if !input.ascii_only? && input.encoding == Encoding::BINARY
+ input.force_encoding(Encoding::UTF_8)
+ input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
+ end
+
+ if freeze
+ input.freeze
+ source.deep_freeze
+ end
+
+ result
+ end
+
+ # Deserialize the dumped output from a request to lex or lex_file.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> LexResult
+ def self.load_lex(input, serialized, freeze)
+ source = Source.for(input, 1, [])
+ loader = Loader.new(source, serialized)
+
+ tokens = loader.load_tokens
+ encoding = loader.load_encoding
+ start_line = loader.load_varsint
+ offsets = loader.load_line_offsets(freeze)
+
+ source.replace_start_line(start_line)
+ source.replace_offsets(offsets)
+
+ comments = loader.load_comments(freeze)
+ magic_comments = loader.load_magic_comments(freeze)
+ data_loc = loader.load_optional_location_object(freeze)
+ errors = loader.load_errors(encoding, freeze)
+ warnings = loader.load_warnings(encoding, freeze)
+ continuable = loader.load_bool
+ raise unless loader.eof?
+
+ result = LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+
+ tokens.each do |token|
+ token[0].value.force_encoding(encoding)
+
+ if freeze
+ token[0].deep_freeze
+ token.freeze
+ end
+ end
+
+ if freeze
+ source.deep_freeze
+ tokens.freeze
+ result.freeze
+ end
+
+ result
+ end
+
+ # Deserialize the dumped output from a request to parse_comments or
+ # parse_file_comments.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> Array[Comment]
+ def self.load_parse_comments(input, serialized, freeze)
+ source = Source.for(input, 1, [])
+ loader = Loader.new(source, serialized)
+
+ loader.load_header
+ loader.load_encoding
+ start_line = loader.load_varsint
+
+ source.replace_start_line(start_line)
+
+ result = loader.load_comments(freeze)
+ raise unless loader.eof?
+
+ source.deep_freeze if freeze
+ result
+ end
+
+ # Deserialize the dumped output from a request to parse_lex or
+ # parse_lex_file.
+ #
+ # The formatting of the source of this method is purposeful to illustrate
+ # the structure of the serialized data.
+ #--
+ #: (String input, String serialized, bool freeze) -> ParseLexResult
+ def self.load_parse_lex(input, serialized, freeze)
+ source = Source.for(input, 1, [])
+ loader = Loader.new(source, serialized)
+
+ tokens = loader.load_tokens
+ loader.load_header
+ encoding = loader.load_encoding
+ start_line = loader.load_varsint
+ offsets = loader.load_line_offsets(freeze)
+
+ source.replace_start_line(start_line)
+ source.replace_offsets(offsets)
+
+ comments = loader.load_comments(freeze)
+ magic_comments = loader.load_magic_comments(freeze)
+ data_loc = loader.load_optional_location_object(freeze)
+ errors = loader.load_errors(encoding, freeze)
+ warnings = loader.load_warnings(encoding, freeze)
+ continuable = loader.load_bool
+ cpool_base = loader.load_uint32
+ cpool_size = loader.load_varuint
+
+ constant_pool = ConstantPool.new(serialized, cpool_base, cpool_size)
+
+ node = loader.load_node(constant_pool, encoding, freeze) #: ProgramNode
+ loader.load_constant_pool(constant_pool)
+ raise unless loader.eof?
+
+ value = [node, tokens] #: [ProgramNode, Array[[Token, Integer]]]
+ result = ParseLexResult.new(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+
+ tokens.each do |token|
+ token[0].value.force_encoding(encoding)
+
+ if freeze
+ token[0].deep_freeze
+ token.freeze
+ end
+ end
+
+ if freeze
+ source.deep_freeze
+ tokens.freeze
+ value.freeze
+ result.freeze
+ end
+
+ result
+ end
+
+ class ConstantPool # :nodoc:
+ attr_reader :size #: Integer
+
+ # @rbs @serialized: String
+ # @rbs @base: Integer
+ # @rbs @pool: Array[Symbol?]
+
+ #: (String serialized, Integer base, Integer size) -> void
+ def initialize(serialized, base, size)
+ @serialized = serialized
+ @base = base
+ @size = size
+ @pool = Array.new(size, nil)
+ end
+
+ #: (Integer index, Encoding encoding) -> Symbol
+ def get(index, encoding)
+ @pool[index] ||=
+ begin
+ offset = @base + index * 8
+ start = @serialized.unpack1("L", offset: offset) #: Integer
+ length = @serialized.unpack1("L", offset: offset + 4) #: Integer
+
+ (@serialized.byteslice(start, length) or raise).force_encoding(encoding).to_sym
+ end
+ end
+ end
+
+ if RUBY_ENGINE == "truffleruby"
+ # StringIO is synchronized and that adds a high overhead on TruffleRuby.
+ # @rbs skip
+ class FastStringIO # :nodoc:
+ attr_accessor :pos
+
+ def initialize(string)
+ @string = string
+ @pos = 0
+ end
+
+ def getbyte
+ byte = @string.getbyte(@pos)
+ @pos += 1
+ byte
+ end
+
+ def read(n)
+ slice = @string.byteslice(@pos, n)
+ @pos += n
+ slice
+ end
+
+ def eof?
+ @pos >= @string.bytesize
+ end
+ end
+ else
+ FastStringIO = ::StringIO # :nodoc:
+ end
+
+ class Loader # :nodoc:
+ attr_reader :input #: String
+ attr_reader :io #: StringIO
+ attr_reader :source #: Source
+
+ #: (Source source, String serialized) -> void
+ def initialize(source, serialized)
+ @input = source.source.dup
+ raise unless serialized.encoding == Encoding::BINARY
+ @io = FastStringIO.new(serialized)
+ @source = source
+ define_load_node_lambdas if RUBY_ENGINE != "ruby"
+ end
+
+ #: () -> bool
+ def eof?
+ io.getbyte
+ io.eof?
+ end
+
+ #: (ConstantPool constant_pool) -> void
+ def load_constant_pool(constant_pool)
+ trailer = 0
+
+ constant_pool.size.times do |index|
+ length = (io.read(8) or raise).unpack1("L", offset: 4) #: Integer
+ trailer += length
+ end
+
+ io.read(trailer)
+ end
+
+ #: () -> void
+ def load_header
+ raise "Invalid serialization" if io.read(5) != "PRISM"
+ raise "Invalid serialization" if (io.read(3) or raise).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
+ raise "Invalid serialization (location fields must be included but are not)" if io.getbyte != 0
+ end
+
+ #: () -> Encoding
+ def load_encoding
+ encoding = Encoding.find((io.read(load_varuint) or raise)) or raise
+ @input = input.force_encoding(encoding).freeze
+ encoding
+ end
+
+ #: (bool freeze) -> Array[Integer]
+ def load_line_offsets(freeze)
+ offsets = Array.new(load_varuint) { load_varuint }
+ offsets.freeze if freeze
+ offsets
+ end
+
+ #: (bool freeze) -> Array[Comment]
+ def load_comments(freeze)
+ comments =
+ Array.new(load_varuint) do
+ comment =
+ case load_varuint
+ when 0 then InlineComment.new(load_location_object(freeze))
+ when 1 then EmbDocComment.new(load_location_object(freeze))
+ else raise
+ end
+
+ comment.freeze if freeze
+ comment
+ end
+
+ comments.freeze if freeze
+ comments
+ end
+
+ #: (bool freeze) -> Array[MagicComment]
+ def load_magic_comments(freeze)
+ magic_comments =
+ Array.new(load_varuint) do
+ magic_comment =
+ MagicComment.new(
+ load_location_object(freeze),
+ load_location_object(freeze)
+ )
+
+ magic_comment.freeze if freeze
+ magic_comment
+ end
+
+ magic_comments.freeze if freeze
+ magic_comments
+ end
+
+ DIAGNOSTIC_TYPES = [
+ <%- errors.each do |error| -%>
+ <%= error.name.downcase.to_sym.inspect %>,
+ <%- end -%>
+ <%- warnings.each do |warning| -%>
+ <%= warning.name.downcase.to_sym.inspect %>,
+ <%- end -%>
+ ].freeze #: Array[Symbol]
+
+ private_constant :DIAGNOSTIC_TYPES
+
+ #: () -> Symbol
+ def load_error_level
+ level = io.getbyte
+
+ case level
+ when 0
+ :syntax
+ when 1
+ :argument
+ when 2
+ :load
+ else
+ raise "Unknown level: #{level}"
+ end
+ end
+
+ #: (Encoding encoding, bool freeze) -> Array[ParseError]
+ def load_errors(encoding, freeze)
+ errors =
+ Array.new(load_varuint) do
+ error =
+ ParseError.new(
+ DIAGNOSTIC_TYPES.fetch(load_varuint),
+ load_string(encoding),
+ load_location_object(freeze),
+ load_error_level
+ )
+
+ error.freeze if freeze
+ error
+ end
+
+ errors.freeze if freeze
+ errors
+ end
+
+ #: () -> Symbol
+ def load_warning_level
+ level = io.getbyte
+
+ case level
+ when 0
+ :default
+ when 1
+ :verbose
+ else
+ raise "Unknown level: #{level}"
+ end
+ end
+
+ #: (Encoding encoding, bool freeze) -> Array[ParseWarning]
+ def load_warnings(encoding, freeze)
+ warnings =
+ Array.new(load_varuint) do
+ warning =
+ ParseWarning.new(
+ DIAGNOSTIC_TYPES.fetch(load_varuint),
+ load_string(encoding),
+ load_location_object(freeze),
+ load_warning_level
+ )
+
+ warning.freeze if freeze
+ warning
+ end
+
+ warnings.freeze if freeze
+ warnings
+ end
+
+ #: () -> Array[[Token, Integer]]
+ def load_tokens
+ tokens = [] #: Array[[Token, Integer]]
+
+ while (type = TOKEN_TYPES.fetch(load_varuint))
+ location = load_location_object(false)
+
+ lex_state = load_varuint
+
+ token = Token.new(@source, type, location.slice, location)
+
+ tokens << [token, lex_state]
+ end
+
+ tokens
+ end
+
+ # variable-length integer using https://en.wikipedia.org/wiki/LEB128
+ # This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
+ #--
+ #: () -> Integer
+ def load_varuint
+ n = (io.getbyte or raise)
+ if n < 128
+ n
+ else
+ n -= 128
+ shift = 0
+ while (b = (io.getbyte or raise)) >= 128
+ n += (b - 128) << (shift += 7)
+ end
+ n + (b << (shift + 7))
+ end
+ end
+
+ #: () -> Integer
+ def load_varsint
+ n = load_varuint
+ (n >> 1) ^ (-(n & 1))
+ end
+
+ #: () -> Integer
+ def load_integer
+ negative = io.getbyte != 0
+ length = load_varuint
+
+ value = 0
+ length.times { |index| value |= (load_varuint << (index * 32)) }
+
+ value = -value if negative
+ value
+ end
+
+ #: () -> Float
+ def load_double
+ (io.read(8) or raise).unpack1("D") #: Float
+ end
+
+ #: () -> bool
+ def load_bool
+ (io.getbyte or raise) != 0
+ end
+
+ #: () -> Integer
+ def load_uint32
+ (io.read(4) or raise).unpack1("L") #: Integer
+ end
+
+ #: (ConstantPool constant_pool, Encoding encoding, bool freeze) -> node?
+ def load_optional_node(constant_pool, encoding, freeze)
+ if io.getbyte != 0
+ io.pos -= 1
+ load_node(constant_pool, encoding, freeze)
+ end
+ end
+
+ #: (Encoding encoding) -> String
+ def load_string(encoding)
+ (io.read(load_varuint) or raise).force_encoding(encoding).freeze
+ end
+
+ #: (bool freeze) -> Location
+ def load_location_object(freeze)
+ location = Location.new(source, load_varuint, load_varuint)
+ location.freeze if freeze
+ location
+ end
+
+ # Load a location object from the serialized data. Note that we are lying
+ # about the signature a bit here, because we sometimes load it as a packed
+ # integer instead of an object.
+ #--
+ #: (bool freeze) -> Location
+ def load_location(freeze)
+ return load_location_object(freeze) if freeze
+ (load_varuint << 32) | load_varuint #: Location
+ end
+
+ # Load an optional location object from the serialized data if it is
+ # present. Note that we are lying about the signature a bit here, because
+ # we sometimes load it as a packed integer instead of an object.
+ #--
+ #: (bool freeze) -> Location?
+ def load_optional_location(freeze)
+ load_location(freeze) if io.getbyte != 0
+ end
+
+ #: (bool freeze) -> Location?
+ def load_optional_location_object(freeze)
+ load_location_object(freeze) if io.getbyte != 0
+ end
+
+ #: (ConstantPool constant_pool, Encoding encoding) -> Symbol
+ def load_constant(constant_pool, encoding)
+ index = load_varuint
+ constant_pool.get(index - 1, encoding)
+ end
+
+ #: (ConstantPool constant_pool, Encoding encoding) -> Symbol?
+ def load_optional_constant(constant_pool, encoding)
+ index = load_varuint
+ constant_pool.get(index - 1, encoding) if index != 0
+ end
+
+ if RUBY_ENGINE == "ruby"
+ #: (ConstantPool constant_pool, Encoding encoding, bool freeze) -> node
+ def load_node(constant_pool, encoding, freeze)
+ type = io.getbyte
+ node_id = load_varuint
+ location = load_location(freeze) #: Location
+ value =
+ case type
+ <%- nodes.each_with_index do |node, index| -%>
+ when <%= index + 1 %>
+ <%- if node.needs_serialized_length? -%>
+ load_uint32
+ <%- end -%>
+ <%= node.name %>.new(
+ source,
+ node_id,
+ location,
+ load_varuint,
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ load_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ load_optional_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::StringField -%>
+ load_string(encoding),
+ <%- when Prism::Template::NodeListField -%>
+ Array.new(load_varuint) do
+ load_node(constant_pool, encoding, freeze) #: <%= field.element_rbs_class %>
+ end.tap { |nodes| nodes.freeze if freeze },
+ <%- when Prism::Template::ConstantField -%>
+ load_constant(constant_pool, encoding),
+ <%- when Prism::Template::OptionalConstantField -%>
+ load_optional_constant(constant_pool, encoding),
+ <%- when Prism::Template::ConstantListField -%>
+ Array.new(load_varuint) { load_constant(constant_pool, encoding) }.tap { |constants| constants.freeze if freeze },
+ <%- when Prism::Template::LocationField -%>
+ load_location(freeze),
+ <%- when Prism::Template::OptionalLocationField -%>
+ load_optional_location(freeze),
+ <%- when Prism::Template::UInt8Field -%>
+ (io.getbyte or raise),
+ <%- when Prism::Template::UInt32Field -%>
+ load_varuint,
+ <%- when Prism::Template::IntegerField -%>
+ load_integer,
+ <%- when Prism::Template::DoubleField -%>
+ load_double,
+ <%- else raise -%>
+ <%- end -%>
+ <%- end -%>
+ )
+ <%- end -%>
+ else
+ raise "Unknown node type: #{type}"
+ end
+
+ value.freeze if freeze
+ value
+ end
+ else
+ # @rbs skip
+ def load_node(constant_pool, encoding, freeze)
+ @load_node_lambdas[(io.getbyte or raise)].call(constant_pool, encoding, freeze)
+ end
+
+ # @rbs skip
+ def define_load_node_lambdas
+ @load_node_lambdas = [
+ nil,
+ <%- nodes.each do |node| -%>
+ -> (constant_pool, encoding, freeze) {
+ node_id = load_varuint
+ location = load_location(freeze)
+ <%- if node.needs_serialized_length? -%>
+ load_uint32
+ <%- end -%>
+ value =
+ <%= node.name %>.new(
+ source,
+ node_id,
+ location,
+ load_varuint,
+ <%- node.fields.map do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ load_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::OptionalNodeField -%>
+ load_optional_node(constant_pool, encoding, freeze), #: <%= field.rbs_class %>
+ <%- when Prism::Template::StringField -%>
+ load_string(encoding),
+ <%- when Prism::Template::NodeListField -%>
+ Array.new(load_varuint) do
+ load_node(constant_pool, encoding, freeze) #: <%= field.element_rbs_class %>
+ end,
+ <%- when Prism::Template::ConstantField -%>
+ load_constant(constant_pool, encoding),
+ <%- when Prism::Template::OptionalConstantField -%>
+ load_optional_constant(constant_pool, encoding),
+ <%- when Prism::Template::ConstantListField -%>
+ Array.new(load_varuint) { load_constant(constant_pool, encoding) },
+ <%- when Prism::Template::LocationField -%>
+ load_location(freeze),
+ <%- when Prism::Template::OptionalLocationField -%>
+ load_optional_location(freeze),
+ <%- when Prism::Template::UInt8Field -%>
+ (io.getbyte or raise),
+ <%- when Prism::Template::UInt32Field -%>
+ load_varuint,
+ <%- when Prism::Template::IntegerField -%>
+ load_integer,
+ <%- when Prism::Template::DoubleField -%>
+ load_double,
+ <%- else raise -%>
+ <%- end -%>
+ <%- end -%>
+ )
+ value.freeze if freeze
+ value
+ },
+ <%- end -%>
+ ]
+ end
+ end
+
+ # @rbs!
+ # @load_node_lambdas: Array[Proc]
+ # def define_load_node_lambdas: () -> void
+ end
+
+ # The token types that can be indexed by their enum values.
+ TOKEN_TYPES = [
+ nil,
+ <%- tokens.each do |token| -%>
+ <%= token.name.to_sym.inspect %>,
+ <%- end -%>
+ ].freeze #: Array[Symbol?]
+
+ private_constant :MAJOR_VERSION, :MINOR_VERSION, :PATCH_VERSION
+ private_constant :ConstantPool, :FastStringIO, :Loader, :TOKEN_TYPES
+ end
+
+ private_constant :Serialize
+end
diff --git a/prism/templates/lib/prism/visitor.rb.erb b/prism/templates/lib/prism/visitor.rb.erb
new file mode 100644
index 0000000000..f23e87d99e
--- /dev/null
+++ b/prism/templates/lib/prism/visitor.rb.erb
@@ -0,0 +1,73 @@
+#--
+# rbs_inline: enabled
+
+module Prism
+ # @rbs!
+ # interface _Visitor
+ # <% nodes.each do |node| %>
+ # def visit_<%= node.human %>: (<%= node.name %>) -> void
+ # <% end %>
+ # end
+
+ # A class that knows how to walk down the tree. None of the individual visit
+ # methods are implemented on this visitor, so it forces the consumer to
+ # implement each one that they need. For a default implementation that
+ # continues walking the tree, see the Visitor class.
+ class BasicVisitor
+ # Calls `accept` on the given node if it is not `nil`, which in turn should
+ # call back into this visitor by calling the appropriate `visit_*` method.
+ #--
+ #: (node? node) -> void
+ def visit(node)
+ # @type self: _Visitor
+ node&.accept(self)
+ end
+
+ # Visits each node in `nodes` by calling `accept` on each one.
+ #--
+ #: (Array[node?] nodes) -> void
+ def visit_all(nodes)
+ # @type self: _Visitor
+ nodes.each { |node| node&.accept(self) }
+ end
+
+ # Visits the child nodes of `node` by calling `accept` on each one.
+ #--
+ #: (node node) -> void
+ def visit_child_nodes(node)
+ # @type self: _Visitor
+ node.each_child_node { |node| node.accept(self) }
+ end
+ end
+
+ # A visitor is a class that provides a default implementation for every accept
+ # method defined on the nodes. This means it can walk a tree without the
+ # caller needing to define any special handling. This allows you to handle a
+ # subset of the tree, while still walking the whole tree.
+ #
+ # For example, to find all of the method calls that call the `foo` method, you
+ # could write:
+ #
+ # class FooCalls < Prism::Visitor
+ # def visit_call_node(node)
+ # if node.name == :foo
+ # # Do something with the node
+ # end
+ #
+ # # Call super so that the visitor continues walking the tree
+ # super
+ # end
+ # end
+ #
+ class Visitor < BasicVisitor
+ <%- nodes.each_with_index do |node, index| -%>
+<%= "\n" if index != 0 -%>
+ # Visit a <%= node.name %> node
+ #--
+ #: (<%= node.name %> node) -> void
+ def visit_<%= node.human %>(node)
+ node.each_child_node { |node| node.accept(self) }
+ end
+ <%- end -%>
+ end
+end
diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb
new file mode 100644
index 0000000000..0dea732869
--- /dev/null
+++ b/prism/templates/src/diagnostic.c.erb
@@ -0,0 +1,554 @@
+#include "prism/internal/diagnostic.h"
+
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/allocator.h"
+#include "prism/internal/arena.h"
+#include "prism/internal/list.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define PM_DIAGNOSTIC_ID_MAX <%= errors.length + warnings.length %>
+
+/** This struct holds the data for each diagnostic. */
+typedef struct {
+ /** The message associated with the diagnostic. */
+ const char* message;
+
+ /** The level associated with the diagnostic. */
+ uint8_t level;
+} pm_diagnostic_data_t;
+
+/**
+ * ## Message composition
+ *
+ * When composing an error message, use sentence fragments.
+ *
+ * Try describing the property of the code that caused the error, rather than
+ * the rule that is being violated. It may help to use a fragment that completes
+ * a sentence beginning, "the parser encountered (a) ...". If appropriate, add a
+ * description of the rule violation (or other helpful context) after a
+ * semicolon.
+ *
+ * For example:, instead of "control escape sequence cannot be doubled", prefer:
+ *
+ * > "invalid control escape sequence; control cannot be repeated"
+ *
+ * In some cases, where the failure is more general or syntax expectations are
+ * violated, it may make more sense to use a fragment that completes a sentence
+ * beginning, "the parser ...".
+ *
+ * For example:
+ *
+ * > "expected an expression after `(`"
+ * > "cannot parse the expression"
+ *
+ * ## Message style guide
+ *
+ * - Use articles like "a", "an", and "the" when appropriate.
+ * - e.g., prefer "cannot parse the expression" to "cannot parse expression".
+ * - Use the common name for tokens and nodes.
+ * - e.g., prefer "keyword splat" to "assoc splat"
+ * - e.g., prefer "embedded document" to "embdoc"
+ * - Do not capitalize the initial word of the message.
+ * - Use back ticks around token literals
+ * - e.g., "Expected a `=>` between the hash key and value"
+ * - Do not use `.` or other punctuation at the end of the message.
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
+ * - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
+ *
+ * ## Error names (PM_ERR_*)
+ *
+ * - When appropriate, prefer node name to token name.
+ * - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
+ * - Prefer token name to common name.
+ * - e.g., prefer "STAR" to "ASTERISK".
+ * - Try to order the words in the name from more general to more specific,
+ * - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
+ * - When in doubt, look for similar patterns and name them so that they are grouped when lexically
+ * sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
+ *
+ * ## Level
+ *
+ * For errors, they are:
+ *
+ * * `PM_ERROR_LEVEL_SYNTAX` - Errors that should raise SyntaxError.
+ * * `PM_ERROR_LEVEL_ARGUMENT` - Errors that should raise ArgumentError.
+ * * `PM_ERROR_LEVEL_LOAD` - Errors that should raise LoadError.
+ *
+ * For warnings, they are:
+ *
+ * * `PM_WARNING_LEVEL_DEFAULT` - Warnings that appear for `ruby -c -e 'code'`.
+ * * `PM_WARNING_LEVEL_VERBOSE` - Warnings that appear with `-w`, as in `ruby -w -c -e 'code'`.
+ */
+static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
+ /* Special error that can be replaced */
+ [PM_ERR_CANNOT_PARSE_EXPRESSION] = { "cannot parse the expression", PM_ERROR_LEVEL_SYNTAX },
+
+ /* Errors that should raise argument errors */
+ [PM_ERR_INVALID_ENCODING_MAGIC_COMMENT] = { "unknown or invalid encoding in the magic comment", PM_ERROR_LEVEL_ARGUMENT },
+
+ /* Errors that should raise load errors */
+ [PM_ERR_SCRIPT_NOT_FOUND] = { "no Ruby script found in input", PM_ERROR_LEVEL_LOAD },
+
+ /* Errors that should raise syntax errors */
+ [PM_ERR_ALIAS_ARGUMENT] = { "invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE] = { "invalid argument being passed to `alias`; can't make alias for the number variables", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = { "unexpected `&&=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_AFTER_BLOCK] = { "unexpected argument after a block argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES] = { "unexpected argument after `...`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_BARE_HASH] = { "unexpected bare hash argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_BLOCK_MULTI] = { "both block arg and actual block given; only one block is allowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_CONFLICT_AMPERSAND] = { "unexpected `&`; anonymous block parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_CONFLICT_STAR] = { "unexpected `*`; anonymous rest parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_CONFLICT_STAR_STAR] = { "unexpected `**`; anonymous keyword rest parameter is also used within block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_CLASS] = { "invalid formal argument; formal argument cannot be a class variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_CONSTANT] = { "invalid formal argument; formal argument cannot be a constant", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_GLOBAL] = { "invalid formal argument; formal argument cannot be a global variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORMAL_IVAR] = { "invalid formal argument; formal argument cannot be an instance variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_FORWARDING_UNBOUND] = { "unexpected `...` in an non-parenthesized call", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND] = { "unexpected `&`; no anonymous block parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES] = { "unexpected ... when the parent method is not forwarding", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA] = { "unexpected ... in lambda argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK] = { "unexpected ... in block argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_STAR] = { "unexpected `*`; no anonymous rest parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR] = { "unexpected `**`; no anonymous keyword rest parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT] = { "unexpected `*` splat argument after a `**` keyword splat argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT] = { "unexpected `*` splat argument after a `*` splat argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_TERM_PAREN] = { "unexpected %s; expected a `)` to close the arguments", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARGUMENT_UNEXPECTED_BLOCK] = { "unexpected '{' after a method call without parenthesis", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_ELEMENT] = { "expected an element for the array", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_EXPRESSION] = { "expected an expression for the array element", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_EXPRESSION_AFTER_STAR] = { "expected an expression after `*` in the array", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_SEPARATOR] = { "unexpected %s; expected a `,` separator for the array elements", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ARRAY_TERM] = { "unexpected %s; expected a `]` to close the array", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_LONELY_ELSE] = { "unexpected `else` in `begin` block; else without rescue is useless", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_TERM] = { "expected an `end` to close the `begin` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_UPCASE_BRACE] = { "expected a `{` after `BEGIN`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_UPCASE_TERM] = { "expected a `}` to close the `BEGIN` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BEGIN_UPCASE_TOPLEVEL] = { "BEGIN is permitted only at toplevel", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE] = { "expected a local variable name in the block parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_PARAM_PIPE_TERM] = { "expected the block parameters to end with `|`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_TERM_BRACE] = { "expected a block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_BLOCK_TERM_END] = { "expected a block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CANNOT_PARSE_STRING_PART] = { "cannot parse the string part", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_EXPRESSION_AFTER_CASE] = { "expected an expression after `case`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_EXPRESSION_AFTER_WHEN] = { "expected an expression after `when`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_MATCH_MISSING_PREDICATE] = { "expected a predicate for a case matching statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_MISSING_CONDITIONS] = { "expected a `when` or `in` clause after `case`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CASE_TERM] = { "expected an `end` to close the `case` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_IN_METHOD] = { "unexpected class definition in method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_NAME] = { "unexpected constant path after `class`; class/module name must be CONSTANT", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_SUPERCLASS] = { "expected a superclass after `<`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_TERM] = { "expected an `end` to close the `class` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_UNEXPECTED_END] = { "unexpected `end`, expecting ';' or '\\n'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CLASS_VARIABLE_BARE] = { "'@@' without identifiers is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_ELSIF_PREDICATE] = { "expected a predicate expression for the `elsif` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_IF_PREDICATE] = { "expected a predicate expression for the `if` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_PREDICATE_TERM] = { "expected `then` or `;` or '\\n'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_TERM] = { "expected an `end` to close the conditional clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_TERM_ELSE] = { "expected an `end` to close the `else` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_UNLESS_PREDICATE] = { "expected a predicate expression for the `unless` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_UNTIL_PREDICATE] = { "expected a predicate expression for the `until` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONDITIONAL_WHILE_PREDICATE] = { "expected a predicate expression for the `while` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT] = { "expected a constant after the `::` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS] = { "could not parse the endless method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_PARAMETERS] = { "could not parse the endless method parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_SETTER] = { "invalid method name; a setter method cannot be defined in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_ENDLESS_DO_BLOCK] = { "unexpected `do` for block in an endless method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_NAME] = { "unexpected %s; expected a method name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_PARAMS_TERM] = { "expected a delimiter to close the parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_PARAMS_TERM_PAREN] = { "unexpected %s; expected a `)` to close the parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_RECEIVER] = { "expected a receiver for the method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_RECEIVER_TERM] = { "expected a `.` or `::` after the receiver in a method definition", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEF_TERM] = { "expected an `end` to close the `def` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_DEFINED_EXPRESSION] = { "expected an expression after `defined?`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EMBDOC_TERM] = { "embedded document meets end of file", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EMBEXPR_END] = { "expected a `}` to close the embedded expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EMBVAR_INVALID] = { "invalid embedded variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_END_UPCASE_BRACE] = { "expected a `{` after `END`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_END_UPCASE_TERM] = { "expected a `}` to close the `END` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_CONTROL] = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT] = { "invalid control escape sequence; control cannot be repeated", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_HEXADECIMAL] = { "invalid hex escape sequence", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_META] = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_META_REPEAT] = { "invalid meta escape sequence; meta cannot be repeated", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE] = { "invalid Unicode escape sequence", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_CM_FLAGS] = { "invalid Unicode escape sequence; Unicode cannot be combined with control or meta flags", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LIST] = { "invalid Unicode list: %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL] = { "invalid Unicode escape sequence; Multiple codepoints at single character literal are disallowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_LONG] = { "invalid Unicode escape sequence; maximum length is 6 digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_SHORT] = { "too short escape sequence: %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_ESCAPE_INVALID_UNICODE_TERM] = { "unterminated Unicode escape", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_ARGUMENT] = { "unexpected %s; expected an argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EOL_AFTER_STATEMENT] = { "unexpected %s, expecting end-of-input", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ] = { "expected an expression after `&&=`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ] = { "expected an expression after `||=`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA] = { "expected an expression after `,`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL] = { "expected an expression after `=`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS] = { "expected an expression after `<<`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_LPAREN] = { "expected an expression after `(`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR] = { "unexpected %s; expected an expression after the operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT] = { "expected an expression after `*` splat in an argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH] = { "expected an expression after `**` in a hash", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_EXPRESSION_AFTER_STAR] = { "expected an expression after `*`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_FOR_DELIMITER] = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_IN_DELIMITER] = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN] = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_MESSAGE] = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RBRACKET] = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RPAREN] = { "expected a matching `)`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RPAREN_AFTER_MULTI] = { "expected a `)` after multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_RPAREN_REQ_PARAMETER] = { "expected a `)` to end a required parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER] = { "unexpected %s; expected a newline or a ';' after the singleton class", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_STRING_CONTENT] = { "expected string content after opening string delimiter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPECT_WHEN_DELIMITER] = { "expected a delimiter after the predicates of a `when` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_BARE_HASH] = { "unexpected bare hash in expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE] = { "unexpected '='; target cannot be written", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING] = { "Can't assign to __ENCODING__", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE] = { "Can't assign to false", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_FILE] = { "Can't assign to __FILE__", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_LINE] = { "Can't assign to __LINE__", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_NIL] = { "Can't assign to nil", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED] = { "Can't assign to numbered parameter %.2s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_SELF] = { "Can't change the value of self", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE] = { "Can't assign to true", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FLOAT_PARSE] = { "could not parse the float '%.*s'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_COLLECTION] = { "expected a collection after the `in` in a `for` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_INDEX] = { "expected an index after `for`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_IN] = { "expected an `in` after the index in a `for` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_FOR_TERM] = { "expected an `end` to close the `for` loop", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_GLOBAL_VARIABLE_BARE] = { "'$' without identifiers is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_EXPRESSION_AFTER_LABEL] = { "expected an expression after the label in a hash", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_KEY] = { "unexpected %s, expecting '}' or a key in the hash literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_ROCKET] = { "expected a `=>` between the hash key and value", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_TERM] = { "expected a `}` to close the hash literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HASH_VALUE] = { "unexpected %s; expected a value in the hash literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HEREDOC_IDENTIFIER] = { "unterminated here document identifier", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_HEREDOC_TERM] = { "unterminated heredoc; can't find string \"%.*s\" anywhere before EOF", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_QUESTION_MARK] = { "incomplete expression at `?`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3] = { "`%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_CLASS] = { "'%.*s' is not allowed as a class variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3] = { "`%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INCOMPLETE_VARIABLE_INSTANCE] = { "'%.*s' is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INSTANCE_VARIABLE_BARE] = { "'@' without identifiers is not allowed as an instance variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_BLOCK_EXIT] = { "Invalid %s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_COMMA] = { "invalid comma", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_ESCAPE_CHARACTER] = { "Invalid escape character syntax", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_FLOAT_EXPONENT] = { "invalid exponent", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_LOCAL_VARIABLE_READ] = { "identifier %.*s is not valid to get", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_LOCAL_VARIABLE_WRITE] = { "identifier %.*s is not valid to set", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_BINARY] = { "invalid binary number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_DECIMAL] = { "invalid decimal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_FRACTION] = { "unexpected fraction part after numeric literal", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_HEXADECIMAL] = { "invalid hexadecimal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_OCTAL] = { "invalid octal number; numeric literal without digits", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER] = { "invalid underscore placement in number", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING] = { "trailing '_' in number", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_CHARACTER] = { "Invalid char '\\x%02X' in expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_MULTIBYTE_CHAR] = { "invalid multibyte char (%s)", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_MULTIBYTE_CHARACTER] = { "invalid multibyte character 0x%X", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_MULTIBYTE_ESCAPE] = { "invalid multibyte escape: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_PRINTABLE_CHARACTER] = { "invalid character `%c`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_PERCENT] = { "unknown type of %string", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_PERCENT_EOF] = { "unterminated quoted string meets end of file", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_RETRY_AFTER_ELSE] = { "Invalid retry after else", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_RETRY_AFTER_ENSURE] = { "Invalid retry after ensure", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_RETRY_WITHOUT_RESCUE] = { "Invalid retry without rescue", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_SYMBOL] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_VARIABLE_GLOBAL_3_3] = { "`%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_VARIABLE_GLOBAL] = { "'%.*s' is not allowed as a global variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_INVALID_YIELD] = { "Invalid yield", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_IT_NOT_ALLOWED_NUMBERED] = { "'it' is not allowed when a numbered parameter is already used", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_IT_NOT_ALLOWED_ORDINARY] = { "'it' is not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LAMBDA_OPEN] = { "expected a `do` keyword or a `{` to open the lambda block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LAMBDA_TERM_BRACE] = { "expected a lambda block beginning with `{` to end with `}`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LAMBDA_TERM_END] = { "expected a lambda block beginning with `do` to end with `end`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_LOWER_ELEMENT] = { "expected a symbol in a `%i` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_LOWER_TERM] = { "unterminated list; expected a closing delimiter for the `%i`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_UPPER_ELEMENT] = { "expected a symbol in a `%I` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_I_UPPER_TERM] = { "unterminated list; expected a closing delimiter for the `%I`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_LOWER_ELEMENT] = { "expected a string in a `%w` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_LOWER_TERM] = { "unterminated list; expected a closing delimiter for the `%w`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_UPPER_ELEMENT] = { "expected a string in a `%W` list", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_LIST_W_UPPER_TERM] = { "unterminated list; expected a closing delimiter for the `%W`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MALLOC_FAILED] = { "failed to allocate memory", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MIXED_ENCODING] = { "UTF-8 mixed within %s source", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MODULE_IN_METHOD] = { "unexpected module definition in method body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MODULE_NAME] = { "unexpected constant path after `module`; class/module name must be CONSTANT", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MODULE_TERM] = { "expected an `end` to close the `module` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MULTI_ASSIGN_MULTI_SPLATS] = { "multiple splats in multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST] = { "unexpected '%.*s' resulting in multiple splats in multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NESTING_TOO_DEEP] = { "nesting too deep", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NO_LOCAL_VARIABLE] = { "%.*s: no such local variable", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NON_ASSOCIATIVE_OPERATOR] = { "unexpected %s; %s is a non-associative operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NOT_EXPRESSION] = { "expected an expression after `not`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBER_LITERAL_UNDERSCORE] = { "number literal ending with a `_`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK] = { "numbered parameter is already used in inner block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_IT] = { "numbered parameters are not allowed when 'it' is already used", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_ORDINARY] = { "numbered parameters are not allowed when an ordinary parameter is defined", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK] = { "numbered parameter is already used in outer block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_OPERATOR_MULTI_ASSIGN] = { "unexpected operator for a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_OPERATOR_WRITE_ARGUMENTS] = { "unexpected operator after a call with arguments", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_OPERATOR_WRITE_BLOCK] = { "unexpected operator after a call with a block", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI] = { "unexpected multiple `**` splat parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_BLOCK_MULTI] = { "multiple block parameters; only one block is allowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_CIRCULAR] = { "circular argument reference - %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_FORWARDING_AFTER_REST] = { "... after rest argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_METHOD_NAME] = { "unexpected name for a parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NAME_DUPLICATED] = { "duplicated argument name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NO_DEFAULT] = { "expected a default value for the parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NO_DEFAULT_KW] = { "expected a default value for the keyword parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_NUMBERED_RESERVED] = { "%.2s is reserved for numbered parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_ORDER] = { "unexpected parameter order", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_SPLAT_MULTI] = { "unexpected multiple `*` splat parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_STAR] = { "unexpected parameter `*`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_UNEXPECTED_FWD] = { "unexpected `...` in parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = { "unexpected `,` in parameters", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PARAMETER_UNEXPECTED_NO_KW] = { "unexpected **nil; no keywords marker disallowed after keywords", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS] = { "unexpected multiple '*' rest patterns in an array pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_CAPTURE_DUPLICATE] = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE] = { "variable capture in alternative pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_IN] = { "expected a pattern expression after the `in` keyword", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_KEY] = { "expected a pattern expression after the key", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN] = { "expected a pattern expression after the `(` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PIN] = { "expected a pattern expression after the `^` pin operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE] = { "expected a pattern expression after the `|` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE] = { "expected a pattern expression after the range operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_EXPRESSION_AFTER_REST] = { "unexpected pattern expression after the `**` expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_FIND_MISSING_INNER] = { "find patterns need at least one required inner pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_IMPLICIT] = { "unexpected implicit hash in pattern; use '{' to delineate", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY] = { "unexpected %s; expected a key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_DUPLICATE] = { "duplicated key name", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_INTERPOLATED] = { "symbol literal with interpolation is not allowed", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_LABEL] = { "expected a label as the key in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_HASH_KEY_LOCALS] = { "key must be valid as local variables", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_IDENT_AFTER_HROCKET] = { "expected an identifier after the `=>` operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_LABEL_AFTER_COMMA] = { "expected a label after the `,` in the hash pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_REST] = { "unexpected rest pattern", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_TERM_BRACE] = { "expected a `}` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_TERM_BRACKET] = { "expected a `]` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PATTERN_TERM_PAREN] = { "expected a `)` to close the pattern expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN] = { "unexpected `||=` in a multiple assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH] = { "regexp encoding option '%c' differs from source encoding '%s'", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_ESCAPED_NON_ASCII_IN_UTF8] = { "escaped non ASCII character in UTF-8 regexp: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING] = { "incompatible character encoding: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_INVALID_CHAR_PROPERTY] = { "invalid character property name {%.*s}: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_INVALID_UNICODE_RANGE] = { "invalid Unicode range: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_NON_ESCAPED_MBC] = { "/.../n has a non escaped non ASCII character in non ASCII-8BIT script: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_PARSE_ERROR] = { "%s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_UNKNOWN_OPTIONS] = { "unknown regexp %s - %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_TERM] = { "unterminated regexp meets end of file; expected a closing delimiter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_REGEXP_UTF8_CHAR_NON_UTF8_REGEXP] = { "UTF-8 character in non UTF-8 regexp: /%.*s/", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_EXPRESSION] = { "expected a rescued expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_MODIFIER_VALUE] = { "expected a value after the `rescue` modifier", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_TERM] = { "expected a closing delimiter for the `rescue` clause", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RESCUE_VARIABLE] = { "expected an exception variable after `=>` in a rescue statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_RETURN_INVALID] = { "Invalid return in class/module body", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_SINGLETON_FOR_LITERALS] = { "cannot define singleton method for literals", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_ALIAS] = { "unexpected an `alias` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_POSTEXE_END] = { "unexpected an `END` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_PREEXE_BEGIN] = { "unexpected a `BEGIN` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STATEMENT_UNDEF] = { "unexpected an `undef` at a non-statement position", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_CONCATENATION] = { "expected a string for concatenation", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_INTERPOLATED_TERM] = { "unterminated string; expected a closing delimiter for the interpolated string", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_LITERAL_EOF] = { "unterminated string meets end of file", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_STRING_LITERAL_TERM] = { "unexpected %s, expected a string literal terminator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_SYMBOL_INVALID] = { "invalid symbol", PM_ERROR_LEVEL_SYNTAX }, /* TODO expected symbol? prism.c ~9719 */
+ [PM_ERR_SYMBOL_TERM_DYNAMIC] = { "unterminated quoted string; expected a closing delimiter for the dynamic symbol", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_SYMBOL_TERM_INTERPOLATED] = { "unterminated symbol; expected a closing delimiter for the interpolated symbol", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_TERNARY_COLON] = { "expected a `:` after the true expression of a ternary operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_TERNARY_EXPRESSION_FALSE] = { "expected an expression after `:` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_TERNARY_EXPRESSION_TRUE] = { "expected an expression after `?` in the ternary operator", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNARY_RECEIVER] = { "unexpected %s, expected a receiver for unary `%c`", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNARY_DISALLOWED] = { "unexpected %s; unary calls are not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNDEF_ARGUMENT] = { "invalid argument being passed to `undef`; expected a bare word, constant, or symbol argument", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_BLOCK_ARGUMENT] = { "block argument should not be given", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_INDEX_BLOCK] = { "unexpected block arg given in index assignment; blocks are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_INDEX_KEYWORDS] = { "unexpected keyword arg given in index assignment; keywords are not allowed in index assignment expressions", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_LABEL] = { "unexpected label", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_MULTI_WRITE] = { "unexpected multiple assignment; multiple assignment is not allowed in this context", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE] = { "unexpected %s; expected a default value for a parameter", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_RANGE_OPERATOR] = { "unexpected range operator; .. and ... are non-associative and cannot be chained", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_SAFE_NAVIGATION] = { "&. inside multiple assignment destination", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT] = { "unexpected %s, assuming it is closing the parent %s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNEXPECTED_TOKEN_IGNORE] = { "unexpected %s, ignoring it", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_UNTIL_TERM] = { "expected an `end` to close the `until` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_VOID_EXPRESSION] = { "unexpected void value expression", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WHILE_TERM] = { "expected an `end` to close the `while` statement", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WRITE_TARGET_IN_METHOD] = { "dynamic constant assignment", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WRITE_TARGET_READONLY] = { "Can't set variable %.*s", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_WRITE_TARGET_UNEXPECTED] = { "unexpected write target", PM_ERROR_LEVEL_SYNTAX },
+ [PM_ERR_XSTRING_TERM] = { "expected a closing delimiter for the `%x` or backtick string", PM_ERROR_LEVEL_SYNTAX },
+
+ /* Warnings */
+ [PM_WARN_AMBIGUOUS_BINARY_OPERATOR] = { "'%s' after local variable or literal is interpreted as binary operator even though it seems like %s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS] = { "ambiguous first argument; put parentheses or a space even after `-` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND] = { "ambiguous `&` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_PREFIX_STAR] = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR] = { "ambiguous `**` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_AMBIGUOUS_SLASH] = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_COMPARISON_AFTER_COMPARISON] = { "comparison '%.*s' after comparison", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_DOT_DOT_DOT_EOL] = { "... at EOL, should be parenthesized?", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_DUPLICATED_HASH_KEY] = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_DUPLICATED_WHEN_CLAUSE] = { "'when' clause on line %" PRIi32 " duplicates 'when' clause on line %" PRIi32 " and is ignored", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_EQUAL_IN_CONDITIONAL_3_3] = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_EQUAL_IN_CONDITIONAL] = { "found '= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_END_IN_METHOD] = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_IGNORED_FROZEN_STRING_LITERAL] = { "'frozen_string_literal' is ignored after any tokens", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_INDENTATION_MISMATCH] = { "mismatched indentations at '%.*s' with '%.*s' at %" PRIi32, PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_INTEGER_IN_FLIP_FLOP] = { "integer literal in flip-flop", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_INVALID_CHARACTER] = { "invalid character syntax; use %s%s%s", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_INVALID_MAGIC_COMMENT_VALUE] = { "invalid value for %.*s: %.*s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_INVALID_NUMBERED_REFERENCE] = { "'%.*s' is too big for a number variable, always nil", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_KEYWORD_EOL] = { "`%.*s` at the end of line without an expression", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_LITERAL_IN_CONDITION_DEFAULT] = { "%sliteral in %s", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_LITERAL_IN_CONDITION_VERBOSE] = { "%sliteral in %s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE] = { "'shareable_constant_value' is ignored unless in comment-only line", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_SHEBANG_CARRIAGE_RETURN] = { "shebang line ending with \\r may cause problems", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_UNEXPECTED_CARRIAGE_RETURN] = { "encountered \\r in middle of line, treated as a mere space", PM_WARNING_LEVEL_DEFAULT },
+ [PM_WARN_UNREACHABLE_STATEMENT] = { "statement not reached", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_UNUSED_LOCAL_VARIABLE] = { "assigned but unused variable - %.*s", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_VOID_STATEMENT] = { "possibly useless use of %.*s in void context", PM_WARNING_LEVEL_VERBOSE }
+};
+
+/**
+ * Get the human-readable name of the given diagnostic ID.
+ */
+static const char *
+pm_diagnostic_id_name(pm_diagnostic_id_t diag_id) {
+ switch (diag_id) {
+ <%- errors.each do |error| -%>
+ case PM_ERR_<%= error.name %>: return "<%= error.name.downcase %>";
+ <%- end -%>
+ <%- warnings.each do |warning| -%>
+ case PM_WARN_<%= warning.name %>: return "<%= warning.name.downcase %>";
+ <%- end -%>
+ }
+
+ assert(false && "unreachable");
+ return "";
+}
+
+static PRISM_INLINE const char *
+pm_diagnostic_id_message(pm_diagnostic_id_t diag_id) {
+ assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
+
+ const char *message = diagnostic_messages[diag_id].message;
+ assert(message);
+
+ return message;
+}
+
+static PRISM_INLINE uint8_t
+pm_diagnostic_id_level(pm_diagnostic_id_t diag_id) {
+ assert(diag_id < PM_DIAGNOSTIC_ID_MAX);
+
+ return (uint8_t) diagnostic_messages[diag_id].level;
+}
+
+/**
+ * Get the type of the given diagnostic.
+ */
+const char *
+pm_diagnostic_type(const pm_diagnostic_t *diagnostic) {
+ return pm_diagnostic_id_name(diagnostic->diag_id);
+}
+
+/**
+ * Get the location of the given diagnostic.
+ */
+pm_location_t
+pm_diagnostic_location(const pm_diagnostic_t *diagnostic) {
+ return diagnostic->location;
+}
+
+/**
+ * Get the message of the given diagnostic.
+ */
+const char *
+pm_diagnostic_message(const pm_diagnostic_t *diagnostic) {
+ return diagnostic->message;
+}
+
+/**
+ * Get the error level associated with the given diagnostic.
+ */
+pm_error_level_t
+pm_diagnostic_error_level(const pm_diagnostic_t *diagnostic) {
+ return (pm_error_level_t) pm_diagnostic_id_level(diagnostic->diag_id);
+}
+
+/**
+ * Get the warning level associated with the given diagnostic.
+ */
+pm_warning_level_t
+pm_diagnostic_warning_level(const pm_diagnostic_t *diagnostic) {
+ return (pm_warning_level_t) pm_diagnostic_id_level(diagnostic->diag_id);
+}
+
+/**
+ * Append an error to the given list of diagnostic.
+ */
+void
+pm_diagnostic_list_append(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
+
+ *diagnostic = (pm_diagnostic_t) {
+ .location = { .start = start, .length = length },
+ .diag_id = diag_id,
+ .message = pm_diagnostic_id_message(diag_id),
+ .level = pm_diagnostic_id_level(diag_id)
+ };
+
+ pm_list_append(list, (pm_list_node_t *) diagnostic);
+}
+
+/**
+ * Append a diagnostic to the given list of diagnostics that is using a format
+ * string for its message.
+ */
+void
+pm_diagnostic_list_append_format(pm_arena_t *arena, pm_list_t *list, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id, ...) {
+ va_list arguments;
+ va_start(arguments, diag_id);
+
+ const char *format = pm_diagnostic_id_message(diag_id);
+ int result = vsnprintf(NULL, 0, format, arguments);
+ va_end(arguments);
+
+ if (result < 0) {
+ return;
+ }
+
+ pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) pm_arena_zalloc(arena, sizeof(pm_diagnostic_t), PRISM_ALIGNOF(pm_diagnostic_t));
+
+ size_t message_length = (size_t) (result + 1);
+ char *message = (char *) pm_arena_alloc(arena, message_length, 1);
+
+ va_start(arguments, diag_id);
+ vsnprintf(message, message_length, format, arguments);
+ va_end(arguments);
+
+ *diagnostic = (pm_diagnostic_t) {
+ .location = { .start = start, .length = length },
+ .diag_id = diag_id,
+ .message = message,
+ .level = pm_diagnostic_id_level(diag_id)
+ };
+
+ pm_list_append(list, (pm_list_node_t *) diagnostic);
+}
diff --git a/prism/templates/src/json.c.erb b/prism/templates/src/json.c.erb
new file mode 100644
index 0000000000..5c4ab8d92a
--- /dev/null
+++ b/prism/templates/src/json.c.erb
@@ -0,0 +1,130 @@
+#include "prism/json.h"
+
+// Ensure this translation unit is never empty, even when JSON is excluded.
+typedef int pm_json_unused_t;
+
+#ifndef PRISM_EXCLUDE_JSON
+
+#include "prism/internal/buffer.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/parser.h"
+
+#include <inttypes.h>
+
+static void
+pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
+ const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+}
+
+static void
+pm_dump_json_location(pm_buffer_t *buffer, const pm_location_t *location) {
+ pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"length\":%" PRIu32 "}", location->start, location->length);
+}
+
+/**
+ * Dump JSON to the given buffer.
+ */
+void
+pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ <%- nodes.each do |node| -%>
+ case <%= node.type %>: {
+ pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
+
+ const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+ pm_dump_json_location(buffer, &cast->base.location);
+ <%- [*node.flags, *node.fields].each_with_index do |field, index| -%>
+
+ // Dump the <%= field.name %> field
+ pm_buffer_append_byte(buffer, ',');
+ <%- if field.is_a?(Prism::Template::Flags) -%>
+ pm_buffer_append_string(buffer, "\"flags\":", 8);
+ <%- else -%>
+ pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
+ <%- end -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (cast-><%= field.name %> != NULL) {
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::NodeListField -%>
+ const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::StringField -%>
+ const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+ <%- when Prism::Template::ConstantField -%>
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalConstantField -%>
+ if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::ConstantListField -%>
+ const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::LocationField -%>
+ pm_dump_json_location(buffer, &cast-><%= field.name %>);
+ <%- when Prism::Template::OptionalLocationField -%>
+ if (cast-><%= field.name %>.length != 0) {
+ pm_dump_json_location(buffer, &cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::Template::UInt8Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
+ <%- when Prism::Template::Flags -%>
+ size_t flags = 0;
+ pm_buffer_append_byte(buffer, '[');
+ <%- node.flags.values.each_with_index do |value, index| -%>
+ if (PM_NODE_FLAG_P(cast, PM_<%= node.flags.human.upcase %>_<%= value.name %>)) {
+ if (flags != 0) pm_buffer_append_byte(buffer, ',');
+ pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
+ flags++;
+ }
+ <%- end -%>
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::Template::IntegerField -%>
+ pm_integer_string(buffer, &cast-><%= field.name %>);
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_format(buffer, "%f", cast-><%= field.name %>);
+ <%- else -%>
+ <%- raise %>
+ <%- end -%>
+ <%- end -%>
+
+ pm_buffer_append_byte(buffer, '}');
+ break;
+ }
+ <%- end -%>
+ case PM_SCOPE_NODE:
+ break;
+ }
+}
+
+#endif
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
new file mode 100644
index 0000000000..f51aff6e53
--- /dev/null
+++ b/prism/templates/src/node.c.erb
@@ -0,0 +1,166 @@
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+#include "prism/internal/node.h"
+
+#include "prism/internal/arena.h"
+
+#include <stdlib.h>
+
+/**
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it allocates a
+ * new array from the arena (abandon-and-copy strategy) and copies the existing
+ * data into it.
+ */
+static void
+pm_node_list_grow(pm_arena_t *arena, pm_node_list_t *list, size_t size) {
+ size_t requested_size = list->size + size;
+
+ // Guard against overflow on the addition.
+ if (requested_size < list->size) abort();
+
+ // If the requested size is within the existing capacity, return.
+ if (requested_size <= list->capacity) return;
+
+ // Otherwise, compute the next capacity by doubling.
+ size_t next_capacity = list->capacity == 0 ? 4 : list->capacity * 2;
+
+ // Guard against overflow on the doubling.
+ while (requested_size > next_capacity) {
+ if (next_capacity == 0) abort();
+ next_capacity *= 2;
+ }
+
+ // Allocate a new array from the arena (old array is abandoned).
+ pm_node_t **nodes = (pm_node_t **) pm_arena_alloc(arena, sizeof(pm_node_t *) * next_capacity, PRISM_ALIGNOF(pm_node_t *));
+
+ // Copy old data into the new array.
+ if (list->size > 0) {
+ memcpy(nodes, list->nodes, list->size * sizeof(pm_node_t *));
+ }
+
+ list->nodes = nodes;
+ list->capacity = next_capacity;
+}
+
+/**
+ * Slow path for pm_node_list_append: grow the list and append the node.
+ * Do not call directly - use pm_node_list_append instead.
+ */
+void
+pm_node_list_append_slow(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+ pm_node_list_grow(arena, list, 1);
+ list->nodes[list->size++] = node;
+}
+
+/**
+ * Prepend a new node onto the beginning of the node list.
+ */
+void
+pm_node_list_prepend(pm_arena_t *arena, pm_node_list_t *list, pm_node_t *node) {
+ pm_node_list_grow(arena, list, 1);
+ memmove(list->nodes + 1, list->nodes, list->size * sizeof(pm_node_t *));
+ list->nodes[0] = node;
+ list->size++;
+}
+
+/**
+ * Concatenate the given node list onto the end of the other node list.
+ */
+void
+pm_node_list_concat(pm_arena_t *arena, pm_node_list_t *list, pm_node_list_t *other) {
+ if (other->size > 0) {
+ pm_node_list_grow(arena, list, other->size);
+ memcpy(list->nodes + list->size, other->nodes, other->size * sizeof(pm_node_t *));
+ list->size += other->size;
+ }
+}
+
+/**
+ * Returns a string representation of the given node type.
+ */
+const char *
+pm_node_type(pm_node_type_t node_type)
+{
+ switch (node_type) {
+<%- nodes.each do |node| -%>
+ case <%= node.type %>:
+ return "<%= node.type %>";
+<%- end -%>
+ }
+ return "";
+}
+
+/**
+ * Visit each of the nodes in this subtree using the given visitor callback. The
+ * callback function will be called for each node in the subtree. If it returns
+ * false, then that node's children will not be visited. If it returns true,
+ * then the children will be visited. The data parameter is treated as an opaque
+ * pointer and is passed to the visitor callback for consumers to use as they
+ * see fit.
+ */
+void
+pm_visit_node(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
+ if (visitor(node, data)) pm_visit_child_nodes(node, visitor, data);
+}
+
+/**
+ * Visit the children of the given node with the given callback. This is the
+ * default behavior for walking the tree that is called from pm_visit_node if
+ * the callback returns true.
+ */
+void
+pm_visit_child_nodes(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
+ switch (PM_NODE_TYPE(node)) {
+ <%- nodes.each do |node| -%>
+ <%- if (fields = node.fields.select { |field| field.is_a?(Prism::Template::NodeField) || field.is_a?(Prism::Template::OptionalNodeField) || field.is_a?(Prism::Template::NodeListField) }).any? -%>
+ case <%= node.type %>: {
+ const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
+ <%- fields.each do |field| -%>
+
+ // Visit the <%= field.name %> field
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_visit_node((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (cast-><%= field.name %> != NULL) {
+ pm_visit_node((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+ }
+ <%- when Prism::Template::NodeListField -%>
+ const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ pm_visit_node(<%= field.name %>->nodes[index], visitor, data);
+ }
+ <%- end -%>
+ <%- end -%>
+
+ break;
+ }
+ <%- else -%>
+ case <%= node.type %>:
+ break;
+ <%- end -%>
+ <%- end -%>
+ case PM_SCOPE_NODE:
+ break;
+ }
+}
+<%- nodes.each do |node| -%>
+
+<%- params = node.fields.map(&:c_param) -%>
+/**
+ * Allocate and initialize a new <%= node.name %> node.
+ */
+pm_<%= node.human %>_t *
+pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>) {
+ pm_<%= node.human %>_t *node = (pm_<%= node.human %>_t *) pm_arena_alloc(arena, sizeof(pm_<%= node.human %>_t), PRISM_ALIGNOF(pm_<%= node.human %>_t));
+
+ *node = (pm_<%= node.human %>_t) {
+ .base = { .type = <%= node.type %>, .flags = flags, .node_id = node_id, .location = location }<%= node.fields.empty? ? "" : "," %>
+<%- node.fields.each_with_index do |field, index| -%>
+ .<%= field.name %> = <%= field.name %><%= index < node.fields.size - 1 ? "," : "" %>
+<%- end -%>
+ };
+
+ return node;
+}
+<%- end -%>
diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb
new file mode 100644
index 0000000000..f12531d934
--- /dev/null
+++ b/prism/templates/src/prettyprint.c.erb
@@ -0,0 +1,177 @@
+<%# encoding: ASCII -%>
+#include "prism/prettyprint.h"
+
+/* We optionally support pretty printing nodes. For systems that don't want or
+ * need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_PRETTYPRINT define. */
+#ifdef PRISM_EXCLUDE_PRETTYPRINT
+
+/* Ensure this translation unit is never empty, even when prettyprint is
+ * excluded. */
+typedef int pm_prettyprint_unused_t;
+
+#else
+
+#include "prism/compiler/inline.h"
+#include "prism/internal/buffer.h"
+#include "prism/internal/constant_pool.h"
+#include "prism/internal/integer.h"
+#include "prism/internal/parser.h"
+#include "prism/line_offset_list.h"
+
+#include <inttypes.h>
+
+static PRISM_INLINE void
+prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
+ pm_line_column_t start = pm_line_offset_list_line_column(&parser->line_offsets, location->start, parser->start_line);
+ pm_line_column_t end = pm_line_offset_list_line_column(&parser->line_offsets, location->start + location->length, parser->start_line);
+ pm_buffer_append_format(output_buffer, "(%" PRIi32 ",%" PRIu32 ")-(%" PRIi32 ",%" PRIu32 ")", start.line, start.column, end.line, end.column);
+}
+
+static PRISM_INLINE void
+prettyprint_constant(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_constant_id_t constant_id) {
+ pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+ pm_buffer_append_format(output_buffer, ":%.*s", (int) constant->length, constant->start);
+}
+
+static void
+prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node, pm_buffer_t *prefix_buffer) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_SCOPE_NODE:
+ // We do not need to print a ScopeNode as it's not part of the AST.
+ return;
+ <%- nodes.each do |node| -%>
+ case <%= node.type %>: {
+ <%- if !node.flags.nil? || node.fields.any? -%>
+ pm_<%= node.human %>_t *cast = (pm_<%= node.human %>_t *) node;
+ <%- end -%>
+ pm_buffer_append_string(output_buffer, "@ <%= node.name %> (location: ", <%= node.name.length + 14 %>);
+ prettyprint_location(output_buffer, parser, &node->location);
+ pm_buffer_append_string(output_buffer, ")\n", 2);
+ <%- (fields = [*node.flags, *node.fields]).each_with_index do |field, index| -%>
+ <%- preadd = index == fields.length - 1 ? " " : "| " -%>
+
+ // <%= field.name %>
+ {
+ pm_buffer_concat(output_buffer, prefix_buffer);
+ pm_buffer_append_string(output_buffer, "+-- <%= field.name %>:", <%= 4 + field.name.length + 1 %>);
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_buffer_append_byte(output_buffer, '\n');
+
+ size_t prefix_length = prefix_buffer->length;
+ pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
+ pm_buffer_concat(output_buffer, prefix_buffer);
+ prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
+ prefix_buffer->length = prefix_length;
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (cast-><%= field.name %> == NULL) {
+ pm_buffer_append_string(output_buffer, " nil\n", 5);
+ } else {
+ pm_buffer_append_byte(output_buffer, '\n');
+
+ size_t prefix_length = prefix_buffer->length;
+ pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
+ pm_buffer_concat(output_buffer, prefix_buffer);
+ prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>, prefix_buffer);
+ prefix_buffer->length = prefix_length;
+ }
+ <%- when Prism::Template::StringField -%>
+ pm_buffer_append_string(output_buffer, " \"", 2);
+ pm_buffer_append_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>), PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_string(output_buffer, "\"\n", 2);
+ <%- when Prism::Template::NodeListField -%>
+ pm_buffer_append_format(output_buffer, " (length: %lu)\n", (unsigned long) (cast-><%= field.name %>.size));
+
+ size_t last_index = cast-><%= field.name %>.size;
+ for (uint32_t index = 0; index < last_index; index++) {
+ size_t prefix_length = prefix_buffer->length;
+ pm_buffer_append_string(prefix_buffer, "<%= preadd %>", 4);
+ pm_buffer_concat(output_buffer, prefix_buffer);
+ pm_buffer_append_string(output_buffer, "+-- ", 4);
+ pm_buffer_append_string(prefix_buffer, (index == last_index - 1) ? " " : "| ", 4);
+ prettyprint_node(output_buffer, parser, (pm_node_t *) cast-><%= field.name %>.nodes[index], prefix_buffer);
+ prefix_buffer->length = prefix_length;
+ }
+ <%- when Prism::Template::ConstantField -%>
+ pm_buffer_append_byte(output_buffer, ' ');
+ prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
+ pm_buffer_append_byte(output_buffer, '\n');
+ <%- when Prism::Template::OptionalConstantField -%>
+ if (cast-><%= field.name %> == 0) {
+ pm_buffer_append_string(output_buffer, " nil\n", 5);
+ } else {
+ pm_buffer_append_byte(output_buffer, ' ');
+ prettyprint_constant(output_buffer, parser, cast-><%= field.name %>);
+ pm_buffer_append_byte(output_buffer, '\n');
+ }
+ <%- when Prism::Template::ConstantListField -%>
+ pm_buffer_append_string(output_buffer, " [", 2);
+ for (uint32_t index = 0; index < cast-><%= field.name %>.size; index++) {
+ if (index != 0) pm_buffer_append_string(output_buffer, ", ", 2);
+ prettyprint_constant(output_buffer, parser, cast-><%= field.name %>.ids[index]);
+ }
+ pm_buffer_append_string(output_buffer, "]\n", 2);
+ <%- when Prism::Template::LocationField -%>
+ pm_location_t *location = &cast-><%= field.name %>;
+ pm_buffer_append_byte(output_buffer, ' ');
+ prettyprint_location(output_buffer, parser, location);
+ pm_buffer_append_string(output_buffer, " = \"", 4);
+ pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_string(output_buffer, "\"\n", 2);
+ <%- when Prism::Template::OptionalLocationField -%>
+ pm_location_t *location = &cast-><%= field.name %>;
+ if (location->length == 0) {
+ pm_buffer_append_string(output_buffer, " nil\n", 5);
+ } else {
+ pm_buffer_append_byte(output_buffer, ' ');
+ prettyprint_location(output_buffer, parser, location);
+ pm_buffer_append_string(output_buffer, " = \"", 4);
+ pm_buffer_append_source(output_buffer, parser->start + location->start, (size_t) location->length, PM_BUFFER_ESCAPING_RUBY);
+ pm_buffer_append_string(output_buffer, "\"\n", 2);
+ }
+ <%- when Prism::Template::UInt8Field -%>
+ pm_buffer_append_format(output_buffer, " %" PRIu8 "\n", cast-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+ pm_buffer_append_format(output_buffer, " %" PRIu32 "\n", cast-><%= field.name %>);
+ <%- when Prism::Template::Flags -%>
+ bool found = false;
+ <%- field.values.each do |value| -%>
+ if (cast->base.flags & PM_<%= field.human.upcase %>_<%= value.name %>) {
+ if (found) pm_buffer_append_byte(output_buffer, ',');
+ pm_buffer_append_string(output_buffer, " <%= value.name.downcase %>", <%= value.name.bytesize + 1 %>);
+ found = true;
+ }
+ <%- end -%>
+ if (!found) pm_buffer_append_string(output_buffer, " nil", 4);
+ pm_buffer_append_byte(output_buffer, '\n');
+ <%- when Prism::Template::IntegerField -%>
+ const pm_integer_t *integer = &cast-><%= field.name %>;
+ pm_buffer_append_byte(output_buffer, ' ');
+ pm_integer_string(output_buffer, integer);
+ pm_buffer_append_byte(output_buffer, '\n');
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_format(output_buffer, " %f\n", cast-><%= field.name %>);
+ <%- else -%>
+ <%- raise -%>
+ <%- end -%>
+ }
+ <%- end -%>
+
+ break;
+ }
+ <%- end -%>
+ }
+}
+
+/**
+ * Pretty-prints the AST represented by the given node to the given buffer.
+ */
+void
+pm_prettyprint(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_node_t *node) {
+ pm_buffer_t prefix_buffer = { 0 };
+ prettyprint_node(output_buffer, parser, node, &prefix_buffer);
+ pm_buffer_cleanup(&prefix_buffer);
+}
+
+#endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
new file mode 100644
index 0000000000..3d9811e5db
--- /dev/null
+++ b/prism/templates/src/serialize.c.erb
@@ -0,0 +1,404 @@
+#include "prism/excludes.h"
+
+/* We optionally support serializing to a binary string. For systems that do not
+ * want or need this functionality, it can be turned off with the
+ * PRISM_EXCLUDE_SERIALIZATION define. */
+#ifdef PRISM_EXCLUDE_SERIALIZATION
+
+/* Ensure this translation unit is never empty, even when serialization is
+ * excluded. */
+typedef int pm_serialize_unused_t;
+
+#else
+
+#include "prism/compiler/inline.h"
+
+#include "prism/internal/buffer.h"
+#include "prism/internal/comments.h"
+#include "prism/internal/diagnostic.h"
+#include "prism/internal/encoding.h"
+#include "prism/internal/list.h"
+#include "prism/internal/magic_comments.h"
+#include "prism/internal/options.h"
+#include "prism/internal/parser.h"
+
+#include "prism.h"
+#include "prism/ast.h"
+#include "prism/line_offset_list.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+static PRISM_INLINE uint32_t
+pm_ptrdifft_to_u32(ptrdiff_t value) {
+ assert(value >= 0 && ((unsigned long) value) < UINT32_MAX);
+ return (uint32_t) value;
+}
+
+static PRISM_INLINE uint32_t
+pm_sizet_to_u32(size_t value) {
+ assert(value < UINT32_MAX);
+ return (uint32_t) value;
+}
+
+static void
+pm_serialize_location(const pm_location_t *location, pm_buffer_t *buffer) {
+ pm_buffer_append_varuint(buffer, location->start);
+ pm_buffer_append_varuint(buffer, location->length);
+}
+
+static void
+pm_serialize_string(const pm_string_t *string, pm_buffer_t *buffer) {
+ uint32_t length = pm_sizet_to_u32(pm_string_length(string));
+ pm_buffer_append_varuint(buffer, length);
+ pm_buffer_append_bytes(buffer, pm_string_source(string), length);
+}
+
+static void
+pm_serialize_integer(const pm_integer_t *integer, pm_buffer_t *buffer) {
+ pm_buffer_append_byte(buffer, integer->negative ? 1 : 0);
+ if (integer->values == NULL) {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(1));
+ pm_buffer_append_varuint(buffer, integer->value);
+ } else {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(integer->length));
+ for (size_t i = 0; i < integer->length; i++) {
+ pm_buffer_append_varuint(buffer, integer->values[i]);
+ }
+ }
+}
+
+static void
+pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+ pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
+
+ <%- if Prism::Template::INCLUDE_NODE_ID -%>
+ pm_buffer_append_varuint(buffer, node->node_id);
+ <%- end -%>
+ pm_serialize_location(&node->location, buffer);
+
+ switch (PM_NODE_TYPE(node)) {
+ // We do not need to serialize a ScopeNode ever as
+ // it is not part of the AST
+ case PM_SCOPE_NODE:
+ return;
+ <%- nodes.each do |node| -%>
+ case <%= node.type %>: {
+ <%- if node.needs_serialized_length? -%>
+ // serialize length
+ // encoding of location u32s make us need to save this offset.
+ size_t length_offset = buffer->length;
+ pm_buffer_append_string(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */
+ <%- end -%>
+ <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS && !node.flags -%>
+ pm_buffer_append_varuint(buffer, (uint32_t) node->flags);
+ <%- end -%>
+ <%- node.fields.each do |field| -%>
+ <%- case field -%>
+ <%- when Prism::Template::NodeField -%>
+ pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ <%- when Prism::Template::OptionalNodeField -%>
+ if (((pm_<%= node.human %>_t *)node)-><%= field.name %> == NULL) {
+ pm_buffer_append_byte(buffer, 0);
+ } else {
+ pm_serialize_node(parser, (pm_node_t *)((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ }
+ <%- when Prism::Template::StringField -%>
+ pm_serialize_string(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ <%- when Prism::Template::NodeListField -%>
+ uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
+ pm_buffer_append_varuint(buffer, <%= field.name %>_size);
+ for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
+ pm_serialize_node(parser, (pm_node_t *) ((pm_<%= node.human %>_t *)node)-><%= field.name %>.nodes[index], buffer);
+ }
+ <%- when Prism::Template::ConstantField, Prism::Template::OptionalConstantField -%>
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>));
+ <%- when Prism::Template::ConstantListField -%>
+ uint32_t <%= field.name %>_size = pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.size);
+ pm_buffer_append_varuint(buffer, <%= field.name %>_size);
+ for (uint32_t index = 0; index < <%= field.name %>_size; index++) {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
+ }
+ <%- when Prism::Template::LocationField -%>
+ <%- if field.should_be_serialized? -%>
+ pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ <%- end -%>
+ <%- when Prism::Template::OptionalLocationField -%>
+ <%- if field.should_be_serialized? -%>
+ if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) {
+ pm_buffer_append_byte(buffer, 0);
+ } else {
+ pm_buffer_append_byte(buffer, 1);
+ pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ }
+ <%- end -%>
+ <%- when Prism::Template::UInt8Field -%>
+ pm_buffer_append_byte(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
+ <%- when Prism::Template::UInt32Field -%>
+ pm_buffer_append_varuint(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
+ <%- when Prism::Template::IntegerField -%>
+ pm_serialize_integer(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
+ <%- when Prism::Template::DoubleField -%>
+ pm_buffer_append_double(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>);
+ <%- else -%>
+ <%- raise -%>
+ <%- end -%>
+ <%- end -%>
+ <%- if node.needs_serialized_length? -%>
+ // serialize length
+ uint32_t length = pm_sizet_to_u32(buffer->length - length_offset);
+ memcpy(buffer->value + length_offset, &length, sizeof(uint32_t));
+ <%- end -%>
+ break;
+ }
+ <%- end -%>
+ }
+}
+
+static void
+pm_serialize_line_offset_list(pm_line_offset_list_t *list, pm_buffer_t *buffer) {
+ uint32_t size = pm_sizet_to_u32(list->size);
+ pm_buffer_append_varuint(buffer, size);
+
+ for (uint32_t i = 0; i < size; i++) {
+ uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
+ pm_buffer_append_varuint(buffer, offset);
+ }
+}
+
+static void
+pm_serialize_comment(pm_comment_t *comment, pm_buffer_t *buffer) {
+ // serialize type
+ pm_buffer_append_byte(buffer, (uint8_t) comment->type);
+
+ // serialize location
+ pm_serialize_location(&comment->location, buffer);
+}
+
+/**
+ * Serialize the given list of comments to the given buffer.
+ */
+void
+pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer) {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+ pm_comment_t *comment;
+ for (comment = (pm_comment_t *) list->head; comment != NULL; comment = (pm_comment_t *) comment->node.next) {
+ pm_serialize_comment(comment, buffer);
+ }
+}
+
+static void
+pm_serialize_magic_comment(pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
+ // serialize key location
+ pm_buffer_append_varuint(buffer, magic_comment->key.start);
+ pm_buffer_append_varuint(buffer, magic_comment->key.length);
+
+ // serialize value location
+ pm_buffer_append_varuint(buffer, magic_comment->value.start);
+ pm_buffer_append_varuint(buffer, magic_comment->value.length);
+}
+
+static void
+pm_serialize_magic_comment_list(pm_list_t *list, pm_buffer_t *buffer) {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+ pm_magic_comment_t *magic_comment;
+ for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
+ pm_serialize_magic_comment(magic_comment, buffer);
+ }
+}
+
+static void
+pm_serialize_data_loc(const pm_parser_t *parser, pm_buffer_t *buffer) {
+ if (parser->data_loc.length == 0) {
+ pm_buffer_append_byte(buffer, 0);
+ } else {
+ pm_buffer_append_byte(buffer, 1);
+ pm_serialize_location(&parser->data_loc, buffer);
+ }
+}
+
+static void
+pm_serialize_diagnostic(pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
+ // serialize the type
+ pm_buffer_append_varuint(buffer, (uint32_t) diagnostic->diag_id);
+
+ // serialize message
+ size_t message_length = strlen(diagnostic->message);
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(message_length));
+ pm_buffer_append_string(buffer, diagnostic->message, message_length);
+
+ // serialize location
+ pm_serialize_location(&diagnostic->location, buffer);
+
+ pm_buffer_append_byte(buffer, diagnostic->level);
+}
+
+static void
+pm_serialize_diagnostic_list(pm_list_t *list, pm_buffer_t *buffer) {
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(pm_list_size(list)));
+
+ pm_diagnostic_t *diagnostic;
+ for (diagnostic = (pm_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
+ pm_serialize_diagnostic(diagnostic, buffer);
+ }
+}
+
+/**
+ * Serialize the name of the encoding to the buffer.
+ */
+void
+pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
+ size_t encoding_length = strlen(encoding->name);
+ pm_buffer_append_varuint(buffer, pm_sizet_to_u32(encoding_length));
+ pm_buffer_append_string(buffer, encoding->name, encoding_length);
+}
+
+static void
+pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
+ pm_serialize_encoding(parser->encoding, buffer);
+ pm_buffer_append_varsint(buffer, parser->start_line);
+ pm_serialize_line_offset_list(&parser->line_offsets, buffer);
+<%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
+ pm_serialize_comment_list(&parser->comment_list, buffer);
+<%- end -%>
+ pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer);
+ pm_serialize_data_loc(parser, buffer);
+ pm_serialize_diagnostic_list(&parser->error_list, buffer);
+ pm_serialize_diagnostic_list(&parser->warning_list, buffer);
+ pm_buffer_append_byte(buffer, (uint8_t) parser->continuable);
+}
+
+#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>"
+/**
+ * Serialize the metadata, nodes, and constant pool.
+ */
+void
+pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
+ pm_serialize_metadata(parser, buffer);
+
+ // Here we're going to leave space for the offset of the constant pool in
+ // the buffer.
+ size_t offset = buffer->length;
+ pm_buffer_append_zeroes(buffer, 4);
+
+ // Next, encode the length of the constant pool.
+ pm_buffer_append_varuint(buffer, parser->constant_pool.size);
+
+ // Now we're going to serialize the content of the node.
+ pm_serialize_node(parser, node, buffer);
+
+ // Now we're going to serialize the offset of the constant pool back where
+ // we left space for it.
+ uint32_t length = pm_sizet_to_u32(buffer->length);
+ memcpy(buffer->value + offset, &length, sizeof(uint32_t));
+
+ // Now we're going to serialize the constant pool.
+ offset = buffer->length;
+ pm_buffer_append_zeroes(buffer, parser->constant_pool.size * 8);
+
+ for (uint32_t index = 0; index < parser->constant_pool.capacity; index++) {
+ pm_constant_pool_bucket_t *bucket = &parser->constant_pool.buckets[index];
+
+ // If we find a constant at this index, serialize it at the correct
+ // index in the buffer.
+ if (bucket->id != 0) {
+ pm_constant_t *constant = &parser->constant_pool.constants[bucket->id - 1];
+ size_t buffer_offset = offset + ((((size_t)bucket->id) - 1) * 8);
+
+ // Write the constant contents into the buffer after the constant
+ // pool. In place of the source offset, we store a buffer offset.
+ uint32_t content_offset = pm_sizet_to_u32(buffer->length);
+ memcpy(buffer->value + buffer_offset, &content_offset, 4);
+ pm_buffer_append_bytes(buffer, constant->start, constant->length);
+
+ uint32_t constant_length = pm_sizet_to_u32(constant->length);
+ memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
+ }
+ }
+}
+
+static void
+serialize_token(pm_parser_t *parser, pm_token_t *token, void *data) {
+ pm_buffer_t *buffer = (pm_buffer_t *) data;
+
+ pm_buffer_append_varuint(buffer, token->type);
+ pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(token->start - parser->start));
+ pm_buffer_append_varuint(buffer, pm_ptrdifft_to_u32(token->end - token->start));
+ pm_buffer_append_varuint(buffer, parser->lex_state);
+}
+
+/**
+ * Lex the given source and serialize to the given buffer.
+ */
+void
+pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
+ pm_options_t options = { 0 };
+ pm_options_read(&options, data);
+
+ pm_arena_t arena = { 0 };
+ pm_parser_t parser;
+ pm_parser_init(&arena, &parser, source, size, &options);
+
+ pm_parser_lex_callback_set(&parser, serialize_token, buffer);
+ pm_parse(&parser);
+
+ // Append 0 to mark end of tokens.
+ pm_buffer_append_byte(buffer, 0);
+
+ pm_serialize_metadata(&parser, buffer);
+
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
+}
+
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ */
+void
+pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
+ pm_options_t options = { 0 };
+ pm_options_read(&options, data);
+
+ pm_arena_t arena = { 0 };
+ pm_parser_t parser;
+ pm_parser_init(&arena, &parser, source, size, &options);
+
+ pm_parser_lex_callback_set(&parser, serialize_token, buffer);
+ pm_node_t *node = pm_parse(&parser);
+
+ pm_buffer_append_byte(buffer, 0);
+ pm_serialize(&parser, node, buffer);
+
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
+}
+
+/**
+ * Parse the source and return true if it parses without errors or warnings.
+ */
+bool
+pm_serialize_parse_success_p(const uint8_t *source, size_t size, const char *data) {
+ pm_options_t options = { 0 };
+ pm_options_read(&options, data);
+
+ pm_arena_t arena = { 0 };
+ pm_parser_t parser;
+ pm_parser_init(&arena, &parser, source, size, &options);
+
+ pm_parse(&parser);
+
+ bool result = parser.error_list.size == 0;
+ pm_parser_cleanup(&parser);
+ pm_arena_cleanup(&arena);
+ pm_options_cleanup(&options);
+
+ return result;
+}
+
+#endif
diff --git a/prism/templates/src/tokens.c.erb b/prism/templates/src/tokens.c.erb
new file mode 100644
index 0000000000..1e82954738
--- /dev/null
+++ b/prism/templates/src/tokens.c.erb
@@ -0,0 +1,367 @@
+#include "prism/ast.h"
+
+#include <assert.h>
+
+/**
+ * Returns a string representation of the given token type.
+ */
+const char *
+pm_token_type(pm_token_type_t token_type) {
+ switch (token_type) {
+<%- tokens.each do |token| -%>
+ case PM_TOKEN_<%= token.name %>:
+ return "<%= token.name %>";
+<%- end -%>
+ case PM_TOKEN_MAXIMUM:
+ assert(false && "unreachable");
+ return "";
+ }
+
+ // Provide a default, because some compilers can't determine that the above
+ // switch is exhaustive.
+ assert(false && "unreachable");
+ return "";
+}
+
+/**
+ * Returns the human name of the given token type.
+ */
+const char *
+pm_token_str(pm_token_type_t token_type) {
+ switch (token_type) {
+ case PM_TOKEN_EOF:
+ return "end-of-input";
+ case PM_TOKEN_AMPERSAND:
+ return "'&'";
+ case PM_TOKEN_AMPERSAND_AMPERSAND:
+ return "'&&'";
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
+ return "'&&='";
+ case PM_TOKEN_AMPERSAND_DOT:
+ return "'&.'";
+ case PM_TOKEN_AMPERSAND_EQUAL:
+ return "'&='";
+ case PM_TOKEN_BACKTICK:
+ return "'`'";
+ case PM_TOKEN_BACK_REFERENCE:
+ return "back reference";
+ case PM_TOKEN_BANG:
+ return "'!'";
+ case PM_TOKEN_BANG_EQUAL:
+ return "'!='";
+ case PM_TOKEN_BANG_TILDE:
+ return "'!~'";
+ case PM_TOKEN_BRACE_LEFT:
+ return "'{'";
+ case PM_TOKEN_BRACE_RIGHT:
+ return "'}'";
+ case PM_TOKEN_BRACKET_LEFT:
+ return "'['";
+ case PM_TOKEN_BRACKET_LEFT_ARRAY:
+ return "'['";
+ case PM_TOKEN_BRACKET_LEFT_RIGHT:
+ return "'[]'";
+ case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
+ return "'[]='";
+ case PM_TOKEN_BRACKET_RIGHT:
+ return "']'";
+ case PM_TOKEN_CARET:
+ return "'^'";
+ case PM_TOKEN_CARET_EQUAL:
+ return "'^='";
+ case PM_TOKEN_CHARACTER_LITERAL:
+ return "character literal";
+ case PM_TOKEN_CLASS_VARIABLE:
+ return "class variable";
+ case PM_TOKEN_COLON:
+ return "':'";
+ case PM_TOKEN_COLON_COLON:
+ return "'::'";
+ case PM_TOKEN_COMMA:
+ return "','";
+ case PM_TOKEN_COMMENT:
+ return "comment";
+ case PM_TOKEN_CONSTANT:
+ return "constant";
+ case PM_TOKEN_DOT:
+ return "'.'";
+ case PM_TOKEN_DOT_DOT:
+ return "..";
+ case PM_TOKEN_DOT_DOT_DOT:
+ return "...";
+ case PM_TOKEN_EMBDOC_BEGIN:
+ return "'=begin'";
+ case PM_TOKEN_EMBDOC_END:
+ return "'=end'";
+ case PM_TOKEN_EMBDOC_LINE:
+ return "embedded documentation line";
+ case PM_TOKEN_EMBEXPR_BEGIN:
+ return "'#{'";
+ case PM_TOKEN_EMBEXPR_END:
+ return "'}'";
+ case PM_TOKEN_EMBVAR:
+ return "'#'";
+ case PM_TOKEN_EQUAL:
+ return "'='";
+ case PM_TOKEN_EQUAL_EQUAL:
+ return "'=='";
+ case PM_TOKEN_EQUAL_EQUAL_EQUAL:
+ return "'==='";
+ case PM_TOKEN_EQUAL_GREATER:
+ return "'=>'";
+ case PM_TOKEN_EQUAL_TILDE:
+ return "'=~'";
+ case PM_TOKEN_FLOAT:
+ return "float";
+ case PM_TOKEN_FLOAT_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_FLOAT_RATIONAL:
+ return "rational";
+ case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_GLOBAL_VARIABLE:
+ return "global variable";
+ case PM_TOKEN_GREATER:
+ return "'>'";
+ case PM_TOKEN_GREATER_EQUAL:
+ return "'>='";
+ case PM_TOKEN_GREATER_GREATER:
+ return ">>";
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
+ return ">>=";
+ case PM_TOKEN_HEREDOC_END:
+ return "heredoc ending";
+ case PM_TOKEN_HEREDOC_START:
+ return "heredoc beginning";
+ case PM_TOKEN_IDENTIFIER:
+ return "local variable or method";
+ case PM_TOKEN_IGNORED_NEWLINE:
+ return "ignored newline";
+ case PM_TOKEN_INSTANCE_VARIABLE:
+ return "instance variable";
+ case PM_TOKEN_INTEGER:
+ return "integer";
+ case PM_TOKEN_INTEGER_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_INTEGER_RATIONAL:
+ return "rational";
+ case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
+ return "imaginary";
+ case PM_TOKEN_KEYWORD_ALIAS:
+ return "'alias'";
+ case PM_TOKEN_KEYWORD_AND:
+ return "'and'";
+ case PM_TOKEN_KEYWORD_BEGIN:
+ return "'begin'";
+ case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
+ return "'BEGIN'";
+ case PM_TOKEN_KEYWORD_BREAK:
+ return "'break'";
+ case PM_TOKEN_KEYWORD_CASE:
+ return "'case'";
+ case PM_TOKEN_KEYWORD_CLASS:
+ return "'class'";
+ case PM_TOKEN_KEYWORD_DEF:
+ return "'def'";
+ case PM_TOKEN_KEYWORD_DEFINED:
+ return "'defined?'";
+ case PM_TOKEN_KEYWORD_DO:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_DO_BLOCK:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_DO_LOOP:
+ return "'do'";
+ case PM_TOKEN_KEYWORD_ELSE:
+ return "'else'";
+ case PM_TOKEN_KEYWORD_ELSIF:
+ return "'elsif'";
+ case PM_TOKEN_KEYWORD_END:
+ return "'end'";
+ case PM_TOKEN_KEYWORD_END_UPCASE:
+ return "'END'";
+ case PM_TOKEN_KEYWORD_ENSURE:
+ return "'ensure'";
+ case PM_TOKEN_KEYWORD_FALSE:
+ return "'false'";
+ case PM_TOKEN_KEYWORD_FOR:
+ return "'for'";
+ case PM_TOKEN_KEYWORD_IF:
+ return "'if'";
+ case PM_TOKEN_KEYWORD_IF_MODIFIER:
+ return "'if'";
+ case PM_TOKEN_KEYWORD_IN:
+ return "'in'";
+ case PM_TOKEN_KEYWORD_MODULE:
+ return "'module'";
+ case PM_TOKEN_KEYWORD_NEXT:
+ return "'next'";
+ case PM_TOKEN_KEYWORD_NIL:
+ return "'nil'";
+ case PM_TOKEN_KEYWORD_NOT:
+ return "'not'";
+ case PM_TOKEN_KEYWORD_OR:
+ return "'or'";
+ case PM_TOKEN_KEYWORD_REDO:
+ return "'redo'";
+ case PM_TOKEN_KEYWORD_RESCUE:
+ return "'rescue'";
+ case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
+ return "'rescue' modifier";
+ case PM_TOKEN_KEYWORD_RETRY:
+ return "'retry'";
+ case PM_TOKEN_KEYWORD_RETURN:
+ return "'return'";
+ case PM_TOKEN_KEYWORD_SELF:
+ return "'self'";
+ case PM_TOKEN_KEYWORD_SUPER:
+ return "'super'";
+ case PM_TOKEN_KEYWORD_THEN:
+ return "'then'";
+ case PM_TOKEN_KEYWORD_TRUE:
+ return "'true'";
+ case PM_TOKEN_KEYWORD_UNDEF:
+ return "'undef'";
+ case PM_TOKEN_KEYWORD_UNLESS:
+ return "'unless'";
+ case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
+ return "'unless'";
+ case PM_TOKEN_KEYWORD_UNTIL:
+ return "'until'";
+ case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
+ return "'until'";
+ case PM_TOKEN_KEYWORD_WHEN:
+ return "'when'";
+ case PM_TOKEN_KEYWORD_WHILE:
+ return "'while'";
+ case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
+ return "'while'";
+ case PM_TOKEN_KEYWORD_YIELD:
+ return "'yield'";
+ case PM_TOKEN_KEYWORD___ENCODING__:
+ return "'__ENCODING__'";
+ case PM_TOKEN_KEYWORD___FILE__:
+ return "'__FILE__'";
+ case PM_TOKEN_KEYWORD___LINE__:
+ return "'__LINE__'";
+ case PM_TOKEN_LABEL:
+ return "label";
+ case PM_TOKEN_LABEL_END:
+ return "label terminator";
+ case PM_TOKEN_LAMBDA_BEGIN:
+ return "'{'";
+ case PM_TOKEN_LESS:
+ return "'<'";
+ case PM_TOKEN_LESS_EQUAL:
+ return "'<='";
+ case PM_TOKEN_LESS_EQUAL_GREATER:
+ return "'<=>'";
+ case PM_TOKEN_LESS_LESS:
+ return "<<";
+ case PM_TOKEN_LESS_LESS_EQUAL:
+ return "<<=";
+ case PM_TOKEN_METHOD_NAME:
+ return "method name";
+ case PM_TOKEN_MINUS:
+ return "'-'";
+ case PM_TOKEN_MINUS_EQUAL:
+ return "'-='";
+ case PM_TOKEN_MINUS_GREATER:
+ return "'->'";
+ case PM_TOKEN_NEWLINE:
+ return "newline";
+ case PM_TOKEN_NUMBERED_REFERENCE:
+ return "numbered reference";
+ case PM_TOKEN_PARENTHESIS_LEFT:
+ return "'('";
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+ return "'('";
+ case PM_TOKEN_PARENTHESIS_RIGHT:
+ return "')'";
+ case PM_TOKEN_PERCENT:
+ return "'%'";
+ case PM_TOKEN_PERCENT_EQUAL:
+ return "'%='";
+ case PM_TOKEN_PERCENT_LOWER_I:
+ return "'%i'";
+ case PM_TOKEN_PERCENT_LOWER_W:
+ return "'%w'";
+ case PM_TOKEN_PERCENT_LOWER_X:
+ return "'%x'";
+ case PM_TOKEN_PERCENT_UPPER_I:
+ return "'%I'";
+ case PM_TOKEN_PERCENT_UPPER_W:
+ return "'%W'";
+ case PM_TOKEN_PIPE:
+ return "'|'";
+ case PM_TOKEN_PIPE_EQUAL:
+ return "'|='";
+ case PM_TOKEN_PIPE_PIPE:
+ return "'||'";
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
+ return "'||='";
+ case PM_TOKEN_PLUS:
+ return "'+'";
+ case PM_TOKEN_PLUS_EQUAL:
+ return "'+='";
+ case PM_TOKEN_QUESTION_MARK:
+ return "'?'";
+ case PM_TOKEN_REGEXP_BEGIN:
+ return "regular expression beginning";
+ case PM_TOKEN_REGEXP_END:
+ return "regular expression ending";
+ case PM_TOKEN_SEMICOLON:
+ return "';'";
+ case PM_TOKEN_SLASH:
+ return "'/'";
+ case PM_TOKEN_SLASH_EQUAL:
+ return "'/='";
+ case PM_TOKEN_STAR:
+ return "'*'";
+ case PM_TOKEN_STAR_EQUAL:
+ return "'*='";
+ case PM_TOKEN_STAR_STAR:
+ return "'**'";
+ case PM_TOKEN_STAR_STAR_EQUAL:
+ return "'**='";
+ case PM_TOKEN_STRING_BEGIN:
+ return "string literal";
+ case PM_TOKEN_STRING_CONTENT:
+ return "string content";
+ case PM_TOKEN_STRING_END:
+ return "string ending";
+ case PM_TOKEN_SYMBOL_BEGIN:
+ return "symbol literal";
+ case PM_TOKEN_TILDE:
+ return "'~'";
+ case PM_TOKEN_UAMPERSAND:
+ return "'&'";
+ case PM_TOKEN_UCOLON_COLON:
+ return "'::'";
+ case PM_TOKEN_UDOT_DOT:
+ return "'..'";
+ case PM_TOKEN_UDOT_DOT_DOT:
+ return "'...'";
+ case PM_TOKEN_UMINUS:
+ return "'-'";
+ case PM_TOKEN_UMINUS_NUM:
+ return "'-'";
+ case PM_TOKEN_UPLUS:
+ return "'+'";
+ case PM_TOKEN_USTAR:
+ return "*";
+ case PM_TOKEN_USTAR_STAR:
+ return "**";
+ case PM_TOKEN_WORDS_SEP:
+ return "string separator";
+ case PM_TOKEN___END__:
+ return "'__END__'";
+ case PM_TOKEN_MAXIMUM:
+ assert(false && "unreachable");
+ return "";
+ }
+
+ /* Provide a default, because some compilers cannot determine that the above
+ * switch is exhaustive. */
+ assert(false && "unreachable");
+ return "";
+}
diff --git a/prism/templates/template.rb b/prism/templates/template.rb
new file mode 100755
index 0000000000..0fdeda561f
--- /dev/null
+++ b/prism/templates/template.rb
@@ -0,0 +1,723 @@
+#!/usr/bin/env ruby
+# typed: ignore
+
+require "erb"
+require "fileutils"
+require "yaml"
+
+module Prism
+ module Template # :nodoc: all
+ SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)
+ CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false)
+
+ JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default"
+ JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]"
+ INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby"
+
+ COMMON_FLAGS_COUNT = 2
+
+ class Error
+ attr_reader :name
+
+ def initialize(name)
+ @name = name
+ end
+ end
+
+ class Warning
+ attr_reader :name
+
+ def initialize(name)
+ @name = name
+ end
+ end
+
+ # This module contains methods for escaping characters in JavaDoc comments.
+ module JavaDoc
+ ESCAPES = {
+ "'" => "&#39;",
+ "\"" => "&quot;",
+ "@" => "&#64;",
+ "&" => "&amp;",
+ "<" => "&lt;",
+ ">" => "&gt;"
+ }.freeze
+
+ def self.escape(value)
+ value.gsub(/['&"<>@]/, ESCAPES)
+ end
+ end
+
+ # This module contains methods for escaping characters in Doxygen comments.
+ module Doxygen
+ # Similar to /verbatim ... /endverbatim but doesn't wrap the result in a code block.
+ def self.verbatim(value)
+ value.gsub(/[*%!`#<>_+@-]/, '\\\\\0')
+ end
+ end
+
+ # A comment attached to a field or node.
+ class ConfigComment
+ attr_reader :value
+
+ def initialize(value)
+ @value = value
+ end
+
+ def each_line(&block)
+ value.each_line { |line| yield line.prepend(" ").rstrip }
+ end
+
+ def each_java_line(&block)
+ ConfigComment.new(JavaDoc.escape(value)).each_line(&block)
+ end
+ end
+
+ # This represents a field on a node. It contains all of the necessary
+ # information to template out the code for that field.
+ class Field
+ attr_reader :name, :comment, :options
+
+ def initialize(name:, comment: nil, **options)
+ @name = name
+ @comment = comment
+ @options = options
+ end
+
+ def each_comment_line(&block)
+ ConfigComment.new(comment).each_line(&block) if comment
+ end
+
+ def each_comment_java_line(&block)
+ ConfigComment.new(comment).each_java_line(&block) if comment
+ end
+
+ def semantic_field?
+ true
+ end
+
+ def should_be_serialized?
+ SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true
+ end
+ end
+
+ # Some node fields can be specialized if they point to a specific kind of
+ # node and not just a generic node.
+ class NodeKindField < Field
+ # The C type to use for this field as a function parameter.
+ def c_param
+ "struct #{c_type} *#{name}"
+ end
+
+ def initialize(kind:, **options)
+ @kind = kind
+ super(**options)
+ end
+
+ def c_type
+ if specific_kind
+ "pm_#{specific_kind.gsub(/(?<=.)[A-Z]/, "_\\0").downcase}"
+ else
+ "pm_node"
+ end
+ end
+
+ def ruby_type
+ specific_kind || "Node"
+ end
+
+ def java_type
+ specific_kind || "Node"
+ end
+
+ def java_cast
+ if specific_kind
+ "(Nodes.#{@kind}) "
+ else
+ ""
+ end
+ end
+
+ def specific_kind
+ @kind unless @kind.is_a?(Array)
+ end
+
+ def union_kind
+ @kind if @kind.is_a?(Array)
+ end
+ end
+
+ # This represents a field on a node that is itself a node. We pass them as
+ # references and store them as references.
+ class NodeField < NodeKindField
+ def rbs_class
+ if specific_kind
+ specific_kind
+ elsif union_kind
+ "(#{union_kind.join(" | ")})"
+ else
+ "Prism::node"
+ end
+ end
+
+ def call_seq_type
+ if specific_kind
+ specific_kind
+ elsif union_kind
+ union_kind.join(" | ")
+ else
+ "Node"
+ end
+ end
+
+ def check_field_kind
+ if union_kind
+ "[#{union_kind.join(', ')}, ErrorRecoveryNode].include?(#{name}.class)"
+ else
+ "#{name}.is_a?(#{ruby_type}) || #{name}.is_a?(ErrorRecoveryNode)"
+ end
+ end
+ end
+
+ # This represents a field on a node that is itself a node and can be
+ # optionally null. We pass them as references and store them as references.
+ class OptionalNodeField < NodeKindField
+ def rbs_class
+ if specific_kind
+ "#{specific_kind}?"
+ elsif union_kind
+ "(#{union_kind.join(" | ")})?"
+ else
+ "Prism::node?"
+ end
+ end
+
+ def call_seq_type
+ if specific_kind
+ "#{specific_kind} | nil"
+ elsif union_kind
+ [*union_kind, "nil"].join(" | ")
+ else
+ "Node | nil"
+ end
+ end
+
+ def check_field_kind
+ if union_kind
+ "[#{union_kind.join(', ')}, ErrorRecoveryNode, NilClass].include?(#{name}.class)"
+ else
+ "#{name}.nil? || #{name}.is_a?(#{ruby_type}) || #{name}.is_a?(ErrorRecoveryNode)"
+ end
+ end
+ end
+
+ # This represents a field on a node that is a list of nodes. We pass them as
+ # references and store them directly on the struct.
+ class NodeListField < NodeKindField
+ def c_param
+ "pm_node_list_t #{name}"
+ end
+
+ def element_rbs_class
+ if specific_kind
+ "#{specific_kind}"
+ elsif union_kind
+ "#{union_kind.join(" | ")}"
+ else
+ "Prism::node"
+ end
+ end
+
+ def rbs_class
+ "Array[#{element_rbs_class}]"
+ end
+
+ def call_seq_type
+ if specific_kind
+ "Array[#{specific_kind}]"
+ elsif union_kind
+ "Array[#{union_kind.join(" | ")}]"
+ else
+ "Array[Node]"
+ end
+ end
+
+ def java_type
+ "#{super}[]"
+ end
+
+ def check_field_kind
+ if union_kind
+ "#{name}.all? { |n| [#{union_kind.join(', ')}, ErrorRecoveryNode].include?(n.class) }"
+ else
+ "#{name}.all? { |n| n.is_a?(#{ruby_type}) || n.is_a?(ErrorRecoveryNode) }"
+ end
+ end
+ end
+
+ # This represents a field on a node that is the ID of a string interned
+ # through the parser's constant pool.
+ class ConstantField < Field
+ def c_param
+ "pm_constant_id_t #{name}"
+ end
+
+ def rbs_class
+ "Symbol"
+ end
+
+ def call_seq_type
+ "Symbol"
+ end
+
+ def java_type
+ JAVA_IDENTIFIER_TYPE
+ end
+ end
+
+ # This represents a field on a node that is the ID of a string interned
+ # through the parser's constant pool and can be optionally null.
+ class OptionalConstantField < Field
+ def c_param
+ "pm_constant_id_t #{name}"
+ end
+
+ def rbs_class
+ "Symbol?"
+ end
+
+ def call_seq_type
+ "Symbol | nil"
+ end
+
+ def java_type
+ JAVA_IDENTIFIER_TYPE
+ end
+ end
+
+ # This represents a field on a node that is a list of IDs that are associated
+ # with strings interned through the parser's constant pool.
+ class ConstantListField < Field
+ def c_param
+ "pm_constant_id_list_t #{name}"
+ end
+
+ def rbs_class
+ "Array[Symbol]"
+ end
+
+ def call_seq_type
+ "Array[Symbol]"
+ end
+
+ def java_type
+ "#{JAVA_IDENTIFIER_TYPE}[]"
+ end
+ end
+
+ # This represents a field on a node that is a string.
+ class StringField < Field
+ def c_param
+ "pm_string_t #{name}"
+ end
+
+ def rbs_class
+ "String"
+ end
+
+ def call_seq_type
+ "String"
+ end
+
+ def java_type
+ "byte[]"
+ end
+ end
+
+ # This represents a field on a node that is a location.
+ class LocationField < Field
+ def c_param
+ "pm_location_t #{name}"
+ end
+
+ def semantic_field?
+ false
+ end
+
+ def rbs_class
+ "Location"
+ end
+
+ def call_seq_type
+ "Location"
+ end
+
+ def java_type
+ "Location"
+ end
+ end
+
+ # This represents a field on a node that is a location that is optional.
+ class OptionalLocationField < Field
+ def c_param
+ "pm_location_t #{name}"
+ end
+
+ def semantic_field?
+ false
+ end
+
+ def rbs_class
+ "Location?"
+ end
+
+ def call_seq_type
+ "Location | nil"
+ end
+
+ def java_type
+ "Location"
+ end
+ end
+
+ # This represents an integer field.
+ class UInt8Field < Field
+ def c_param
+ "uint8_t #{name}"
+ end
+
+ def rbs_class
+ "Integer"
+ end
+
+ def call_seq_type
+ "Integer"
+ end
+
+ def java_type
+ "int"
+ end
+ end
+
+ # This represents an integer field.
+ class UInt32Field < Field
+ def c_param
+ "uint32_t #{name}"
+ end
+
+ def rbs_class
+ "Integer"
+ end
+
+ def call_seq_type
+ "Integer"
+ end
+
+ def java_type
+ "int"
+ end
+ end
+
+ # This represents an arbitrarily-sized integer. When it gets to Ruby it will
+ # be an Integer.
+ class IntegerField < Field
+ def c_param
+ "pm_integer_t #{name}"
+ end
+
+ def rbs_class
+ "Integer"
+ end
+
+ def call_seq_type
+ "Integer"
+ end
+
+ def java_type
+ "Object"
+ end
+ end
+
+ # This represents a double-precision floating point number. When it gets to
+ # Ruby it will be a Float.
+ class DoubleField < Field
+ def c_param
+ "double #{name}"
+ end
+
+ def rbs_class
+ "Float"
+ end
+
+ def call_seq_type
+ "Float"
+ end
+
+ def java_type
+ "double"
+ end
+ end
+
+ # This class represents a node in the tree, configured by the config.yml file
+ # in YAML format. It contains information about the name of the node and the
+ # various child nodes it contains.
+ class NodeType
+ attr_reader :name, :type, :human, :flags, :fields, :newline, :comment
+
+ def initialize(config, flags)
+ @name = config.fetch("name")
+
+ type = @name.gsub(/(?<=.)[A-Z]/, "_\\0")
+ @type = "PM_#{type.upcase}"
+ @human = type.downcase
+
+ @fields =
+ config.fetch("fields", []).map do |field|
+ type = field_type_for(field.fetch("type"))
+
+ options = field.transform_keys(&:to_sym)
+ options.delete(:type)
+
+ # If/when we have documentation on every field, this should be
+ # changed to use fetch instead of delete.
+ comment = options.delete(:comment)
+
+ if kinds = options[:kind]
+ kinds = [kinds] unless kinds.is_a?(Array)
+ kinds = kinds.map do |kind|
+ case kind
+ when "non-void expression"
+ # the actual list of types would be way too long
+ "Node"
+ when "pattern expression"
+ # the list of all possible types is too long with 37+ different classes
+ "Node"
+ else
+ kind
+ end
+ end.compact
+ if kinds.size == 1
+ kinds = kinds.first
+ kinds = nil if kinds == "Node"
+ end
+ options[:kind] = kinds
+ else
+ if type < NodeKindField
+ raise "Missing kind in config.yml for field #{@name}##{options.fetch(:name)}"
+ end
+ end
+
+ type.new(comment: comment, **options)
+ end
+
+ @flags = config.key?("flags") ? flags.fetch(config.fetch("flags")) : nil
+ @newline = config.fetch("newline", true)
+ @comment = config.fetch("comment")
+ end
+
+ def each_comment_line(&block)
+ ConfigComment.new(comment).each_line(&block)
+ end
+
+ def each_comment_java_line(&block)
+ ConfigComment.new(comment).each_java_line(&block)
+ end
+
+ def semantic_fields
+ @semantic_fields ||= @fields.select(&:semantic_field?)
+ end
+
+ # Should emit serialized length of node so implementations can skip
+ # the node to enable lazy parsing.
+ def needs_serialized_length?
+ name == "DefNode"
+ end
+
+ private
+
+ def field_type_for(name)
+ case name
+ when "node" then NodeField
+ when "node?" then OptionalNodeField
+ when "node[]" then NodeListField
+ when "string" then StringField
+ when "constant" then ConstantField
+ when "constant?" then OptionalConstantField
+ when "constant[]" then ConstantListField
+ when "location" then LocationField
+ when "location?" then OptionalLocationField
+ when "uint8" then UInt8Field
+ when "uint32" then UInt32Field
+ when "integer" then IntegerField
+ when "double" then DoubleField
+ else raise("Unknown field type: #{name.inspect}")
+ end
+ end
+ end
+
+ # This represents a token in the lexer.
+ class Token
+ attr_reader :name, :value, :comment
+
+ def initialize(config)
+ @name = config.fetch("name")
+ @value = config["value"]
+ @comment = config.fetch("comment")
+ end
+ end
+
+ # Represents a set of flags that should be internally represented with an enum.
+ class Flags
+ # Represents an individual flag within a set of flags.
+ class Flag
+ attr_reader :name, :camelcase, :comment
+
+ def initialize(config)
+ @name = config.fetch("name")
+ @camelcase = @name.split("_").map(&:capitalize).join
+ @comment = config.fetch("comment")
+ end
+ end
+
+ attr_reader :name, :human, :values, :comment
+
+ def initialize(config)
+ @name = config.fetch("name")
+ @human = @name.gsub(/(?<=.)[A-Z]/, "_\\0").downcase
+ @values = config.fetch("values").map { |flag| Flag.new(flag) }
+ @comment = config.fetch("comment")
+ end
+
+ def self.empty
+ new("name" => "", "values" => [], "comment" => "")
+ end
+ end
+
+ class << self
+ # This templates out a file using ERB with the given locals. The locals are
+ # derived from the config.yml file.
+ def render(name, write_to: nil)
+ filepath = "templates/#{name}.erb"
+ template = File.expand_path("../#{filepath}", __dir__)
+
+ erb = read_template(template)
+ extension = File.extname(filepath.gsub(".erb", ""))
+
+ heading =
+ if extension == ".rb"
+ <<~HEADING
+ # frozen_string_literal: true
+ # :markup: markdown
+
+ =begin
+ --
+ This file is generated by the templates/template.rb script and should not be
+ modified manually. See #{filepath}
+ if you are looking to modify the template
+ ++
+ =end
+
+ HEADING
+ else
+ <<~HEADING
+ /*----------------------------------------------------------------------------*/
+ /* This file is generated by the templates/template.rb script and should not */
+ /* be modified manually. See */
+ /* #{filepath.ljust(74)} */
+ /* if you are looking to modify the */
+ /* template */
+ /*----------------------------------------------------------------------------*/
+
+ HEADING
+ end
+
+ write_to ||= File.expand_path("../#{name}", __dir__)
+ contents = heading + erb.result_with_hash(locals)
+
+ if (extension == ".c" || extension == ".h") && !contents.ascii_only?
+ # Enforce that we only have ASCII characters here. This is necessary
+ # for non-UTF-8 locales that only allow ASCII characters in C source
+ # files.
+ contents.each_line.with_index(1) do |line, line_number|
+ raise "Non-ASCII character on line #{line_number} of #{write_to}" unless line.ascii_only?
+ end
+ end
+
+ begin
+ FileUtils.mkdir_p(File.dirname(write_to))
+ File.write(write_to, contents)
+ rescue SystemCallError # EACCES, EPERM, EROFS, etc.
+ # Fall back to the current directory
+ FileUtils.mkdir_p(File.dirname(name))
+ File.write(name, contents)
+ end
+ end
+
+ private
+
+ def read_template(filepath)
+ template = File.read(filepath, encoding: Encoding::UTF_8)
+ erb = erb(template)
+ erb.filename = filepath
+ erb
+ end
+
+ def erb(template)
+ ERB.new(template, trim_mode: "-")
+ end
+
+ def locals
+ @locals ||=
+ begin
+ config = YAML.load_file(File.expand_path("../config.yml", __dir__))
+ flags = config.fetch("flags").to_h { |flags| [flags["name"], Flags.new(flags)] }
+
+ {
+ errors: config.fetch("errors").map { |name| Error.new(name) },
+ warnings: config.fetch("warnings").map { |name| Warning.new(name) },
+ nodes: config.fetch("nodes").map { |node| NodeType.new(node, flags) }.sort_by(&:name),
+ tokens: config.fetch("tokens").map { |token| Token.new(token) },
+ flags: flags.values
+ }
+ end
+ end
+ end
+
+ TEMPLATES = [
+ "ext/prism/api_node.c",
+ "include/prism/ast.h",
+ "include/prism/internal/diagnostic.h",
+ "javascript/src/deserialize.js",
+ "javascript/src/nodes.js",
+ "javascript/src/visitor.js",
+ "java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java",
+ "java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java",
+ "java/api/src/main/java-templates/org/ruby_lang/prism/AbstractNodeVisitor.java",
+ "lib/prism/compiler.rb",
+ "lib/prism/dispatcher.rb",
+ "lib/prism/dot_visitor.rb",
+ "lib/prism/dsl.rb",
+ "lib/prism/inspect_visitor.rb",
+ "lib/prism/mutation_compiler.rb",
+ "lib/prism/node.rb",
+ "lib/prism/reflection.rb",
+ "lib/prism/serialize.rb",
+ "lib/prism/visitor.rb",
+ "src/diagnostic.c",
+ "src/json.c",
+ "src/node.c",
+ "src/prettyprint.c",
+ "src/serialize.c",
+ "src/tokens.c"
+ ]
+ end
+end
+
+if __FILE__ == $0
+ if ARGV.empty?
+ Prism::Template::TEMPLATES.each { |filepath| Prism::Template.render(filepath) }
+ else # ruby/ruby
+ name, write_to = ARGV
+ Prism::Template.render(name, write_to: write_to)
+ end
+end