diff options
Diffstat (limited to 'prism/prism.c')
| -rw-r--r-- | prism/prism.c | 13029 |
1 files changed, 6413 insertions, 6616 deletions
diff --git a/prism/prism.c b/prism/prism.c index cac9832ab6..a2e04ed106 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -1,4 +1,90 @@ -#include "prism.h" +#include "prism/compiler/accel.h" +#include "prism/compiler/fallthrough.h" +#include "prism/compiler/unused.h" + +#include "prism/internal/allocator.h" +#include "prism/internal/arena.h" +#include "prism/internal/bit.h" +#include "prism/internal/buffer.h" +#include "prism/internal/char.h" +#include "prism/internal/comments.h" +#include "prism/internal/constant_pool.h" +#include "prism/internal/diagnostic.h" +#include "prism/internal/encoding.h" +#include "prism/internal/integer.h" +#include "prism/internal/isinf.h" +#include "prism/internal/line_offset_list.h" +#include "prism/internal/list.h" +#include "prism/internal/magic_comments.h" +#include "prism/internal/memchr.h" +#include "prism/internal/node.h" +#include "prism/internal/options.h" +#include "prism/internal/parser.h" +#include "prism/internal/regexp.h" +#include "prism/internal/serialize.h" +#include "prism/internal/source.h" +#include "prism/internal/static_literals.h" +#include "prism/internal/stringy.h" +#include "prism/internal/strncasecmp.h" +#include "prism/internal/strpbrk.h" +#include "prism/internal/tokens.h" + +#include "prism/excludes.h" +#include "prism/serialize.h" +#include "prism/stream.h" +#include "prism/version.h" + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +/** + * When we are parsing using recursive descent, we want to protect against + * malicious payloads that could attempt to crash our parser. We do this by + * specifying a maximum depth to which we are allowed to recurse. + */ +#ifndef PRISM_DEPTH_MAXIMUM + #define PRISM_DEPTH_MAXIMUM 10000 +#endif + +/** + * A simple utility macro to concatenate two tokens together, necessary when one + * of the tokens is itself a macro. + */ +#define PM_CONCATENATE(left, right) left ## right + +/** + * We want to be able to use static assertions, but they weren't standardized + * until C11. As such, we polyfill it here by making a hacky typedef that will + * fail to compile due to a negative array size if the condition is false. + */ +#if defined(_Static_assert) +# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message) +#else +# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1] +#endif + +/** + * Support PRISM_LIKELY and PRISM_UNLIKELY to help the compiler optimize its + * branch predication. + */ +#if defined(__GNUC__) || defined(__clang__) + /** The compiler should predicate that this branch will be taken. */ + #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1) + + /** The compiler should predicate that this branch will not be taken. */ + #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else + /** Void because this platform does not support branch prediction hints. */ + #define PRISM_LIKELY(x) (x) + + /** Void because this platform does not support branch prediction hints. */ + #define PRISM_UNLIKELY(x) (x) +#endif /** * The prism version and the serialization format. @@ -19,6 +105,51 @@ pm_version(void) { #define MAX(a,b) (((a)>(b))?(a):(b)) /******************************************************************************/ +/* Helpful AST-related macros */ +/******************************************************************************/ + +#define U32(value_) ((uint32_t) (value_)) + +#define FL PM_NODE_FLAGS +#define UP PM_NODE_UPCAST + +#define PM_LOCATION_START(location_) ((location_)->start) +#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length) + +#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start) +#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start) +#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start) +#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start) + +#define PM_NODE_START(node_) (UP(node_)->location.start) +#define PM_NODE_LENGTH(node_) (UP(node_)->location.length) +#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length) +#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_)) + +#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_)) +#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_)) + +#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_)) +#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_)) +#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_)) +#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_)) +#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_)) + +#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) }) +#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0) +#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_)) +#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location + +#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_)) +#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_)) +#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_)) +#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_)) + +#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_) +#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_)) +#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_)) + +/******************************************************************************/ /* Lex mode manipulations */ /******************************************************************************/ @@ -26,7 +157,7 @@ pm_version(void) { * Returns the incrementor character that should be used to increment the * nesting count if one is possible. */ -static inline uint8_t +static PRISM_INLINE uint8_t lex_mode_incrementor(const uint8_t start) { switch (start) { case '(': @@ -43,7 +174,7 @@ lex_mode_incrementor(const uint8_t start) { * Returns the matching character that should be used to terminate a list * beginning with the given character. */ -static inline uint8_t +static PRISM_INLINE uint8_t lex_mode_terminator(const uint8_t start) { switch (start) { case '(': @@ -85,7 +216,7 @@ lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) { /** * Push on a new list lex mode. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) { uint8_t incrementor = lex_mode_incrementor(delimiter); uint8_t terminator = lex_mode_terminator(delimiter); @@ -103,7 +234,8 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) { // These are the places where we need to split up the content of the list. // We'll use strpbrk to find the first of these characters. uint8_t *breakpoints = lex_mode.as.list.breakpoints; - memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints)); + memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE); + memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1); size_t index = 7; // Now we'll add the terminator to the list of breakpoints. If the @@ -132,7 +264,7 @@ lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) { * called when we're at the end of the file. We want the parser to be able to * perform its normal error tolerance. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_list_eof(pm_parser_t *parser) { return lex_mode_push_list(parser, false, '\0'); } @@ -140,7 +272,7 @@ lex_mode_push_list_eof(pm_parser_t *parser) { /** * Push on a new regexp lex mode. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) { pm_lex_mode_t lex_mode = { .mode = PM_LEX_REGEXP, @@ -155,7 +287,8 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato // regular expression. We'll use strpbrk to find the first of these // characters. uint8_t *breakpoints = lex_mode.as.regexp.breakpoints; - memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); + memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE); + memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1); size_t index = 4; // First we'll add the terminator. @@ -175,7 +308,7 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato /** * Push on a new string lex mode. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) { pm_lex_mode_t lex_mode = { .mode = PM_LEX_STRING, @@ -191,7 +324,8 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed // These are the places where we need to split up the content of the // string. We'll use strpbrk to find the first of these characters. uint8_t *breakpoints = lex_mode.as.string.breakpoints; - memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints)); + memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE); + memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1); size_t index = 3; // Now add in the terminator. If the terminator is not already a NULL byte, @@ -221,7 +355,7 @@ lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed * called when we're at the end of the file. We want the parser to be able to * perform its normal error tolerance. */ -static inline bool +static PRISM_INLINE bool lex_mode_push_string_eof(pm_parser_t *parser) { return lex_mode_push_string(parser, false, false, '\0', '\0'); } @@ -241,7 +375,7 @@ lex_mode_pop(pm_parser_t *parser) { } else { parser->lex_modes.index--; pm_lex_mode_t *prev = parser->lex_modes.current->prev; - xfree(parser->lex_modes.current); + xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t)); parser->lex_modes.current = prev; } } @@ -249,7 +383,7 @@ lex_mode_pop(pm_parser_t *parser) { /** * This is the equivalent of IS_lex_state is CRuby. */ -static inline bool +static PRISM_INLINE bool lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) { return parser->lex_state & state; } @@ -260,7 +394,7 @@ typedef enum { PM_IGNORED_NEWLINE_PATTERN } pm_ignored_newline_type_t; -static inline pm_ignored_newline_type_t +static PRISM_INLINE pm_ignored_newline_type_t lex_state_ignored_p(pm_parser_t *parser) { bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED); @@ -273,17 +407,17 @@ lex_state_ignored_p(pm_parser_t *parser) { } } -static inline bool +static PRISM_INLINE bool lex_state_beg_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)); } -static inline bool +static PRISM_INLINE bool lex_state_arg_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_ARG_ANY); } -static inline bool +static PRISM_INLINE bool lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) { if (parser->current.end >= parser->end) { return false; @@ -291,7 +425,7 @@ lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) { return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end); } -static inline bool +static PRISM_INLINE bool lex_state_end_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_END_ANY); } @@ -299,7 +433,7 @@ lex_state_end_p(pm_parser_t *parser) { /** * This is the equivalent of IS_AFTER_OPERATOR in CRuby. */ -static inline bool +static PRISM_INLINE bool lex_state_operator_p(pm_parser_t *parser) { return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT); } @@ -308,7 +442,7 @@ lex_state_operator_p(pm_parser_t *parser) { * Set the state of the lexer. This is defined as a function to be able to put a * breakpoint in it. */ -static inline void +static PRISM_INLINE void lex_state_set(pm_parser_t *parser, pm_lex_state_t state) { parser->lex_state = state; } @@ -322,7 +456,7 @@ lex_state_set(pm_parser_t *parser, pm_lex_state_t state) { #endif #if PM_DEBUG_LOGGING -PRISM_ATTRIBUTE_UNUSED static void +PRISM_UNUSED static void debug_state(pm_parser_t *parser) { fprintf(stderr, "STATE: "); bool first = true; @@ -403,140 +537,134 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call /** * Append an error to the list of errors on the parser. */ -static inline void -pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->error_list, start, end, diag_id); +static PRISM_INLINE void +pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id); } /** - * Append an error to the list of errors on the parser using a format string. + * Append an error to the list of errors on the parser using the location of the + * given token. */ -#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \ - pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__) +static PRISM_INLINE void +pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { + pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); +} /** * Append an error to the list of errors on the parser using the location of the * current token. */ -static inline void +static PRISM_INLINE void pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, parser->current.start, parser->current.end, diag_id); + pm_parser_err_token(parser, &parser->current, diag_id); } /** - * Append an error to the list of errors on the parser using the given location - * using a format string. + * Append an error to the list of errors on the parser using the location of the + * previous token. */ -#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__) +static PRISM_INLINE void +pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { + pm_parser_err_token(parser, &parser->previous, diag_id); +} /** * Append an error to the list of errors on the parser using the location of the * given node. */ -static inline void +static PRISM_INLINE void pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, node->location.start, node->location.end, diag_id); + pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } /** - * Append an error to the list of errors on the parser using the location of the - * given node and a format string. - */ -#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) - -/** - * Append an error to the list of errors on the parser using the location of the - * given node and a format string, and add on the content of the node. + * Append an error to the list of errors on the parser using a format string. */ -#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \ - PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start) +#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \ + pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the - * previous token. + * given node and a format string. */ -static inline void -pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id); -} +#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \ + PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the - * given token. + * given node and a format string, and add on the content of the node. */ -static inline void -pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { - pm_parser_err(parser, token->start, token->end, diag_id); -} +#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \ + PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_))) /** * Append an error to the list of errors on the parser using the location of the * given token and a format string. */ -#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \ - PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) +#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \ + PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__) /** * Append an error to the list of errors on the parser using the location of the * given token and a format string, and add on the content of the token. */ -#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ - PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) +#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \ + PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start) /** * Append a warning to the list of warnings on the parser. */ -static inline void -pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id); +static PRISM_INLINE void +pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) { + pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id); } /** * Append a warning to the list of warnings on the parser using the location of * the given token. */ -static inline void +static PRISM_INLINE void pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) { - pm_parser_warn(parser, token->start, token->end, diag_id); + pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id); } /** * Append a warning to the list of warnings on the parser using the location of * the given node. */ -static inline void +static PRISM_INLINE void pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) { - pm_parser_warn(parser, node->location.start, node->location.end, diag_id); + pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id); } /** - * Append a warning to the list of warnings on the parser using a format string. + * Append a warning to the list of warnings on the parser using a format string + * and the given location. */ -#define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \ - pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \ + pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of * the given token and a format string. */ -#define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \ - PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \ + PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__) /** * Append a warning to the list of warnings on the parser using the location of * the given token and a format string, and add on the content of the token. */ -#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \ - PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start) +#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \ + PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start) /** * Append a warning to the list of warnings on the parser using the location of * the given node and a format string. */ -#define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \ - PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__) +#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \ + PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__) /** * Add an error for an expected heredoc terminator. This is a special function @@ -547,8 +675,8 @@ static void pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) { PM_PARSER_ERR_FORMAT( parser, - ident_start, - ident_start + ident_length, + U32(ident_start - parser->start), + U32(ident_length), PM_ERR_HEREDOC_TERM, (int) ident_length, (const char *) ident_start @@ -708,7 +836,7 @@ pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t /** * Get the current state of constant shareability. */ -static inline pm_shareable_constant_value_t +static PRISM_INLINE pm_shareable_constant_value_t pm_parser_scope_shareable_constant_get(pm_parser_t *parser) { return parser->current_scope->shareable_constant; } @@ -733,12 +861,12 @@ pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constan /** * The point at which the set of locals switches from being a list to a hash. */ -#define PM_LOCALS_HASH_THRESHOLD 9 +#define PM_LOCALS_HASH_THRESHOLD 5 static void pm_locals_free(pm_locals_t *locals) { if (locals->capacity > 0) { - xfree(locals->locals); + xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t)); } } @@ -810,11 +938,13 @@ pm_locals_resize(pm_locals_t *locals) { * @return True if the local was added, and false if the local already exists. */ static bool -pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) { +pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) { if (locals->size >= (locals->capacity / 4 * 3)) { pm_locals_resize(locals); } + locals->bloom |= (1u << (name & 31)); + if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) { for (uint32_t index = 0; index < locals->capacity; index++) { pm_local_t *local = &locals->locals[index]; @@ -822,7 +952,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start if (local->name == PM_CONSTANT_ID_UNSET) { *local = (pm_local_t) { .name = name, - .location = { .start = start, .end = end }, + .location = { .start = start, .length = length }, .index = locals->size++, .reads = reads, .hash = 0 @@ -843,7 +973,7 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start if (local->name == PM_CONSTANT_ID_UNSET) { *local = (pm_local_t) { .name = name, - .location = { .start = start, .end = end }, + .location = { .start = start, .length = length }, .index = locals->size++, .reads = reads, .hash = initial_hash @@ -867,6 +997,8 @@ pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start */ static uint32_t pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) { + if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX; + if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) { for (uint32_t index = 0; index < locals->size; index++) { pm_local_t *local = &locals->locals[index]; @@ -943,8 +1075,8 @@ pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) { * written but not read in certain contexts. */ static void -pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) { - pm_constant_id_list_init_capacity(list, locals->size); +pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) { + pm_constant_id_list_init_capacity(parser->arena, list, locals->size); // If we're still below the threshold for switching to a hash, then we only // need to loop over the locals until we hit the size because the locals are @@ -961,14 +1093,14 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, if (local->name != PM_CONSTANT_ID_UNSET) { pm_constant_id_list_insert(list, (size_t) local->index, local->name); - if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) { + if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) { pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name); if (constant->length >= 1 && *constant->start != '_') { PM_PARSER_WARN_FORMAT( parser, local->location.start, - local->location.end, + local->location.length, PM_WARN_UNUSED_LOCAL_VARIABLE, (int) constant->length, (const char *) constant->start @@ -986,43 +1118,53 @@ pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, /** * Retrieve the constant pool id for the given location. */ -static inline pm_constant_id_t -pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start)); +static PRISM_INLINE pm_constant_id_t +pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { + /* Fast path: if this is the same token as the last lookup (same pointer + * range), return the cached result. */ + if (start == parser->constant_cache.start && end == parser->constant_cache.end) { + return parser->constant_cache.id; + } + + pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start)); + + parser->constant_cache.start = start; + parser->constant_cache.end = end; + parser->constant_cache.id = id; + + return id; } /** * Retrieve the constant pool id for the given string. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) { - return pm_constant_pool_insert_owned(&parser->constant_pool, start, length); + return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length); } /** * Retrieve the constant pool id for the given static literal C string. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) { - return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length); + return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length); } /** * Retrieve the constant pool id for the given token. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { - return pm_parser_constant_id_location(parser, token->start, token->end); + return pm_parser_constant_id_raw(parser, token->start, token->end); } /** - * Retrieve the constant pool id for the given token. If the token is not - * provided, then return 0. + * This macro allows you to define a case statement for all of the nodes that + * may result in a void value. */ -static inline pm_constant_id_t -pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) { - return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token); -} +#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \ + case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE /** * Check whether or not the given node is value expression. @@ -1035,12 +1177,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) { while (node != NULL) { switch (PM_NODE_TYPE(node)) { - case PM_RETURN_NODE: - case PM_BREAK_NODE: - case PM_NEXT_NODE: - case PM_REDO_NODE: - case PM_RETRY_NODE: - case PM_MATCH_REQUIRED_NODE: + case PM_CASE_VOID_VALUE: return void_node != NULL ? void_node : node; case PM_MATCH_PREDICATE_NODE: return NULL; @@ -1049,57 +1186,128 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) { if (cast->ensure_clause != NULL) { if (cast->rescue_clause != NULL) { - pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause); + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause)); if (vn != NULL) return vn; } if (cast->statements != NULL) { - pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements); + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements)); if (vn != NULL) return vn; } - node = (pm_node_t *) cast->ensure_clause; + node = UP(cast->ensure_clause); } else if (cast->rescue_clause != NULL) { - if (cast->statements == NULL) return NULL; + // https://bugs.ruby-lang.org/issues/21669 + if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) { + if (cast->statements == NULL) return NULL; - pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements); - if (vn == NULL) return NULL; - if (void_node == NULL) void_node = vn; + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements)); + if (vn == NULL) return NULL; + if (void_node == NULL) void_node = vn; + } for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) { - pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements); + pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements)); + if (vn == NULL) { + // https://bugs.ruby-lang.org/issues/21669 + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) { + return NULL; + } void_node = NULL; break; } - if (void_node == NULL) { - void_node = vn; - } } if (cast->else_clause != NULL) { - node = (pm_node_t *) cast->else_clause; + node = UP(cast->else_clause); + + // https://bugs.ruby-lang.org/issues/21669 + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) { + pm_node_t *vn = pm_check_value_expression(parser, node); + if (vn != NULL) return vn; + } } else { return void_node; } } else { - node = (pm_node_t *) cast->statements; + node = UP(cast->statements); } break; } + case PM_CASE_NODE: { + // https://bugs.ruby-lang.org/issues/21669 + if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) { + return NULL; + } + + pm_case_node_t *cast = (pm_case_node_t *) node; + if (cast->else_clause == NULL) return NULL; + + pm_node_t *condition; + PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) { + assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE)); + + pm_when_node_t *cast = (pm_when_node_t *) condition; + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements)); + if (vn == NULL) return NULL; + if (void_node == NULL) void_node = vn; + } + + node = UP(cast->else_clause); + break; + } + case PM_CASE_MATCH_NODE: { + // https://bugs.ruby-lang.org/issues/21669 + if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) { + return NULL; + } + + pm_case_match_node_t *cast = (pm_case_match_node_t *) node; + if (cast->else_clause == NULL) return NULL; + + pm_node_t *condition; + PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) { + assert(PM_NODE_TYPE_P(condition, PM_IN_NODE)); + + pm_in_node_t *cast = (pm_in_node_t *) condition; + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements)); + if (vn == NULL) return NULL; + if (void_node == NULL) void_node = vn; + } + + node = UP(cast->else_clause); + break; + } case PM_ENSURE_NODE: { pm_ensure_node_t *cast = (pm_ensure_node_t *) node; - node = (pm_node_t *) cast->statements; + node = UP(cast->statements); break; } case PM_PARENTHESES_NODE: { pm_parentheses_node_t *cast = (pm_parentheses_node_t *) node; - node = (pm_node_t *) cast->body; + node = UP(cast->body); break; } case PM_STATEMENTS_NODE: { pm_statements_node_t *cast = (pm_statements_node_t *) node; + + // https://bugs.ruby-lang.org/issues/21669 + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) { + pm_node_t *body_part; + PM_NODE_LIST_FOREACH(&cast->body, index, body_part) { + switch (PM_NODE_TYPE(body_part)) { + case PM_CASE_VOID_VALUE: + if (void_node == NULL) { + void_node = body_part; + } + return void_node; + default: break; + } + } + } + node = cast->body.nodes[cast->body.size - 1]; break; } @@ -1108,7 +1316,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) { if (cast->statements == NULL || cast->subsequent == NULL) { return NULL; } - pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements); + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements)); if (vn == NULL) { return NULL; } @@ -1123,19 +1331,19 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) { if (cast->statements == NULL || cast->else_clause == NULL) { return NULL; } - pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements); + pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements)); if (vn == NULL) { return NULL; } if (void_node == NULL) { void_node = vn; } - node = (pm_node_t *) cast->else_clause; + node = UP(cast->else_clause); break; } case PM_ELSE_NODE: { pm_else_node_t *cast = (pm_else_node_t *) node; - node = (pm_node_t *) cast->statements; + node = UP(cast->statements); break; } case PM_AND_NODE: { @@ -1165,7 +1373,7 @@ pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) { return NULL; } -static inline void +static PRISM_INLINE void pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) { pm_node_t *void_node = pm_check_value_expression(parser, node); if (void_node != NULL) { @@ -1193,7 +1401,7 @@ pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) { break; case PM_CALL_NODE: { const pm_call_node_t *cast = (const pm_call_node_t *) node; - if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break; + if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break; const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name); switch (message->length) { @@ -1406,10 +1614,10 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) { * Add a warning to the parser if the value that is being written inside of a * predicate to a conditional is a literal. */ -static inline void +static PRISM_INLINE void pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) { if (pm_conditional_predicate_warn_write_literal_p(node)) { - pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL); + pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL); } } @@ -1547,26 +1755,6 @@ pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_pr } /** - * In a lot of places in the tree you can have tokens that are not provided but - * that do not cause an error. For example, this happens in a method call - * without parentheses. In these cases we set the token to the "not provided" type. - * For example: - * - * pm_token_t token = not_provided(parser); - */ -static inline pm_token_t -not_provided(pm_parser_t *parser) { - return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }; -} - -#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start }) -#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end }) -#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end }) -#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end }) -#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL }) -#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token)) - -/** * This is a special out parameter to the parse_arguments_list function that * includes opening and closing parentheses in addition to the arguments since * it's so common. It is handy to use when passing argument information to one @@ -1592,22 +1780,29 @@ typedef struct { /** * Retrieve the end location of a `pm_arguments_t` object. */ -static inline const uint8_t * +static PRISM_INLINE const pm_location_t * pm_arguments_end(pm_arguments_t *arguments) { if (arguments->block != NULL) { - const uint8_t *end = arguments->block->location.end; - if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) { - end = arguments->closing_loc.end; + uint32_t end = PM_NODE_END(arguments->block); + + if (arguments->closing_loc.length > 0) { + uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc); + if (arguments_end > end) { + return &arguments->closing_loc; + } } - return end; + return &arguments->block->location; } - if (arguments->closing_loc.start != NULL) { - return arguments->closing_loc.end; + if (arguments->closing_loc.length > 0) { + return &arguments->closing_loc; } if (arguments->arguments != NULL) { - return arguments->arguments->base.location.end; + return &arguments->arguments->base.location; + } + if (arguments->opening_loc.length > 0) { + return &arguments->opening_loc; } - return arguments->closing_loc.end; + return NULL; } /** @@ -1618,7 +1813,7 @@ static void pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) { // First, check that we have arguments and that we don't have a closing // location for them. - if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) { + if (arguments->arguments == NULL || arguments->closing_loc.length > 0) { return; } @@ -1635,7 +1830,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b // If we didn't hit a case before this check, then at this point we need to // add a syntax error. - pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK); + pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK); } /******************************************************************************/ @@ -1648,7 +1843,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b * reason we have the encoding_changed boolean to check if we need to go through * the function pointer or can just directly use the UTF-8 functions. */ -static inline size_t +static PRISM_INLINE size_t char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) { if (n <= 0) return 0; @@ -1675,7 +1870,7 @@ char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t * Similar to char_is_identifier but this function assumes that the encoding * has not been changed. */ -static inline size_t +static PRISM_INLINE size_t char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) { if (n <= 0) { return 0; @@ -1687,11 +1882,189 @@ char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) { } /** + * Scan forward through ASCII identifier characters (a-z, A-Z, 0-9, _) using + * wide operations. Returns the number of leading ASCII identifier bytes. + * Callers must handle any remaining bytes (short tail or non-ASCII/UTF-8) + * with a byte-at-a-time loop. + * + * Up to three optimized implementations are selected at compile time, with a + * no-op fallback for unsupported platforms: + * 1. NEON — processes 16 bytes per iteration on aarch64. + * 2. SSSE3 — processes 16 bytes per iteration on x86-64. + * 3. SWAR — little-endian fallback, processes 8 bytes per iteration. + */ + +#if defined(PRISM_HAS_NEON) +#include <arm_neon.h> + +static PRISM_INLINE size_t +scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { + const uint8_t *cursor = start; + + // Nibble-based lookup tables for classifying [a-zA-Z0-9_]. + // Each high nibble is assigned a unique bit; the low nibble table + // contains the OR of bits for all high nibbles that have an + // identifier character at that low nibble position. A byte is an + // identifier character iff (low_lut[lo] & high_lut[hi]) != 0. + static const uint8_t low_lut_data[16] = { + 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, + 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E + }; + static const uint8_t high_lut_data[16] = { + 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + const uint8x16_t low_lut = vld1q_u8(low_lut_data); + const uint8x16_t high_lut = vld1q_u8(high_lut_data); + const uint8x16_t mask_0f = vdupq_n_u8(0x0F); + + while (cursor + 16 <= end) { + uint8x16_t v = vld1q_u8(cursor); + + uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f)); + uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4)); + uint8x16_t ident = vandq_u8(lo_class, hi_class); + + // Fast check: if the per-byte minimum is nonzero, every byte matched. + if (vminvq_u8(ident) != 0) { + cursor += 16; + continue; + } + + // Find the first non-identifier byte (zero in ident). + uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0)); + uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0); + + if (lo != 0) { + cursor += pm_ctzll(lo) / 8; + } else { + uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1); + cursor += 8 + pm_ctzll(hi) / 8; + } + + return (size_t) (cursor - start); + } + + return (size_t) (cursor - start); +} + +#elif defined(PRISM_HAS_SSSE3) +#include <tmmintrin.h> + +static PRISM_INLINE size_t +scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { + const uint8_t *cursor = start; + + while (cursor + 16 <= end) { + __m128i v = _mm_loadu_si128((const __m128i *) cursor); + __m128i zero = _mm_setzero_si128(); + + // Unsigned range check via saturating subtraction: + // byte >= lo ⟺ saturate(lo - byte) == 0 + // byte <= hi ⟺ saturate(byte - hi) == 0 + + // Fold case: OR with 0x20 maps A-Z to a-z. + __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20)); + __m128i letter = _mm_and_si128( + _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero), + _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero)); + + __m128i digit = _mm_and_si128( + _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero), + _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero)); + + __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F)); + + __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore); + int mask = _mm_movemask_epi8(ident); + + if (mask == 0xFFFF) { + cursor += 16; + continue; + } + + cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF)); + return (size_t) (cursor - start); + } + + return (size_t) (cursor - start); +} + +// The SWAR path uses pm_ctzll to find the first non-matching byte within a +// word, which only yields the correct byte index on little-endian targets. +// We gate on a positive little-endian check so that unknown-endianness +// platforms safely fall through to the no-op fallback. +#elif defined(PRISM_HAS_SWAR) + +/** + * Portable SWAR fallback — processes 8 bytes per iteration. + * + * The byte-wise range checks avoid cross-byte borrows by pre-setting the high + * bit of each byte before subtraction: (byte | 0x80) - lo has a minimum value + * of 0x80 - 0x7F = 1, so underflow (and thus a borrow into the next byte) is + * impossible. The result has bit 7 set if and only if byte >= lo. The same + * reasoning applies to the upper-bound direction. + */ +static PRISM_INLINE size_t +scan_identifier_ascii(const uint8_t *start, const uint8_t *end) { + static const uint64_t ones = 0x0101010101010101ULL; + static const uint64_t highs = 0x8080808080808080ULL; + const uint8_t *cursor = start; + + while (cursor + 8 <= end) { + uint64_t word; + memcpy(&word, cursor, 8); + + // Bail on any non-ASCII byte. + if (word & highs) break; + + uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs; + + // Fold upper- and lowercase together by forcing bit 5 (OR 0x20), + // then check the lowercase range once. A-Z maps to a-z; the + // only non-letter byte that could alias into [0x61,0x7A] is one + // whose original value was in [0x41,0x5A] — which is exactly + // the uppercase letters we want to match. + uint64_t lowered = word | (ones * 0x20); + uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs; + + // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find + // bytes equal to underscore. Safe from cross-byte borrows because + // the ASCII guard above ensures all bytes are < 0x80. + uint64_t xor_us = word ^ (ones * 0x5F); + uint64_t underscore = (xor_us - ones) & ~xor_us & highs; + + uint64_t ident = digit | letter | underscore; + + if (ident == highs) { + cursor += 8; + continue; + } + + // Find the first non-identifier byte. On little-endian the first + // byte sits in the least-significant position. + uint64_t not_ident = ~ident & highs; + cursor += pm_ctzll(not_ident) / 8; + return (size_t) (cursor - start); + } + + return (size_t) (cursor - start); +} + +#else + +// No-op fallback for big-endian or other unsupported platforms. +// The caller's byte-at-a-time loop handles everything. +#define scan_identifier_ascii(start, end) ((size_t) 0) + +#endif + +/** * Like the above, this function is also used extremely frequently to lex all of * the identifiers in a source file once the first character has been found. So * it's important that it be as fast as possible. */ -static inline size_t +static PRISM_INLINE size_t char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) { if (n <= 0) { return 0; @@ -1729,7 +2102,7 @@ const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { #undef BIT #undef PUNCT -static inline bool +static PRISM_INLINE bool char_is_global_name_punctuation(const uint8_t b) { const unsigned int i = (const unsigned int) b; if (i <= 0x20 || 0x7e < i) return false; @@ -1737,7 +2110,7 @@ char_is_global_name_punctuation(const uint8_t b) { return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1; } -static inline bool +static PRISM_INLINE bool token_is_setter_name(pm_token_t *token) { return ( (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) || @@ -1825,7 +2198,7 @@ pm_local_is_keyword(const char *source, size_t length) { /** * Set the given flag on the given node. */ -static inline void +static PRISM_INLINE void pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) { node->flags |= flag; } @@ -1833,7 +2206,7 @@ pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) { /** * Remove the given flag from the given node. */ -static inline void +static PRISM_INLINE void pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) { node->flags &= (pm_node_flags_t) ~flag; } @@ -1841,7 +2214,7 @@ pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) { /** * Set the repeated parameter flag on the given node. */ -static inline void +static PRISM_INLINE void pm_node_flag_set_repeated_parameter(pm_node_t *node) { assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE || PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE || @@ -1869,7 +2242,7 @@ pm_node_flag_set_repeated_parameter(pm_node_t *node) { /** * Parse out the options for a regular expression. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) { pm_node_flags_t flags = 0; @@ -1895,9 +2268,9 @@ pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closin size_t unknown_flags_length = pm_buffer_length(&unknown_flags); if (unknown_flags_length != 0) { const char *word = unknown_flags_length >= 2 ? "options" : "option"; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags)); } - pm_buffer_free(&unknown_flags); + pm_buffer_cleanup(&unknown_flags); } return flags; @@ -1915,36 +2288,45 @@ static size_t pm_statements_node_body_length(pm_statements_node_t *node); /** - * This function is here to allow us a place to extend in the future when we - * implement our own arena allocation. + * Move an integer's values array into the arena. If the integer has heap- + * allocated values, copy them to the arena and free the original. */ -static inline void * -pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) { - void *memory = xcalloc(1, size); - if (memory == NULL) { - fprintf(stderr, "Failed to allocate %d bytes\n", (int) size); - abort(); +static PRISM_INLINE void +pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) { + if (integer->values != NULL) { + size_t byte_size = integer->length * sizeof(uint32_t); + uint32_t *old_values = integer->values; + integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t)); + xfree(old_values); } - return memory; } -#define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type)) -#define PM_NODE_IDENTIFY(parser) (++parser->node_id) - /** - * Allocate a new MissingNode node. + * Allocate a new ErrorRecoveryNode node with no unexpected child. */ -static pm_missing_node_t * -pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t); - - *node = (pm_missing_node_t) {{ - .type = PM_MISSING_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { .start = start, .end = end } - }}; +static pm_error_recovery_node_t * +pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) { + return pm_error_recovery_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = start, .length = length }), + NULL + ); +} - return node; +/** + * Allocate a new ErrorRecoveryNode node wrapping an unexpected child node. + */ +static pm_error_recovery_node_t * +pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) { + return pm_error_recovery_node_new( + parser->arena, + ++parser->node_id, + 0, + unexpected->location, + unexpected + ); } /** @@ -1953,23 +2335,16 @@ pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t static pm_alias_global_variable_node_t * pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) { assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS); - pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t); - - *node = (pm_alias_global_variable_node_t) { - { - .type = PM_ALIAS_GLOBAL_VARIABLE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = old_name->location.end - }, - }, - .new_name = new_name, - .old_name = old_name, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) - }; - return node; + return pm_alias_global_variable_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name), + new_name, + old_name, + TOK2LOC(parser, keyword) + ); } /** @@ -1978,23 +2353,16 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw static pm_alias_method_node_t * pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) { assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS); - pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t); - *node = (pm_alias_method_node_t) { - { - .type = PM_ALIAS_METHOD_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = old_name->location.end - }, - }, - .new_name = new_name, - .old_name = old_name, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) - }; - - return node; + return pm_alias_method_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name), + new_name, + old_name, + TOK2LOC(parser, keyword) + ); } /** @@ -2002,23 +2370,15 @@ pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_n */ static pm_alternation_pattern_node_t * pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) { - pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t); - - *node = (pm_alternation_pattern_node_t) { - { - .type = PM_ALTERNATION_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = left->location.start, - .end = right->location.end - }, - }, - .left = left, - .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_alternation_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(left, right), + left, + right, + TOK2LOC(parser, operator) + ); } /** @@ -2028,23 +2388,15 @@ static pm_and_node_t * pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) { pm_assert_value_expression(parser, left); - pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t); - - *node = (pm_and_node_t) { - { - .type = PM_AND_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = left->location.start, - .end = right->location.end - }, - }, - .left = left, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .right = right - }; - - return node; + return pm_and_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(left, right), + left, + right, + TOK2LOC(parser, operator) + ); } /** @@ -2052,18 +2404,13 @@ pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *opera */ static pm_arguments_node_t * pm_arguments_node_create(pm_parser_t *parser) { - pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t); - - *node = (pm_arguments_node_t) { - { - .type = PM_ARGUMENTS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser) - }, - .arguments = { 0 } - }; - - return node; + return pm_arguments_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + ((pm_node_list_t) { 0 }) + ); } /** @@ -2078,19 +2425,22 @@ pm_arguments_node_size(pm_arguments_node_t *node) { * Append an argument to an arguments node. */ static void -pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) { +pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) { if (pm_arguments_node_size(node) == 0) { - node->base.location.start = argument->location.start; + PM_NODE_START_SET_NODE(node, argument); } - node->base.location.end = argument->location.end; - pm_node_list_append(&node->arguments, argument); + if (PM_NODE_END(node) < PM_NODE_END(argument)) { + PM_NODE_LENGTH_SET_NODE(node, argument); + } + + pm_node_list_append(arena, &node->arguments, argument); if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) { if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) { - pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS); + pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS); } else { - pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT); + pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT); } } } @@ -2100,43 +2450,49 @@ pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argumen */ static pm_array_node_t * pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) { - pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t); - - *node = (pm_array_node_t) { - { - .type = PM_ARRAY_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(opening) - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .elements = { 0 } - }; - - return node; + if (opening == NULL) { + return pm_array_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_UNSET, + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }) + ); + } else { + return pm_array_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, opening), + ((pm_node_list_t) { 0 }), + TOK2LOC(parser, opening), + TOK2LOC(parser, opening) + ); + } } /** * Append an argument to an array node. */ -static inline void -pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) { - if (!node->elements.size && !node->opening_loc.start) { - node->base.location.start = element->location.start; +static PRISM_INLINE void +pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) { + if (!node->elements.size && !node->opening_loc.length) { + PM_NODE_START_SET_NODE(node, element); } - pm_node_list_append(&node->elements, element); - node->base.location.end = element->location.end; + pm_node_list_append(arena, &node->elements, element); + PM_NODE_LENGTH_SET_NODE(node, element); // If the element is not a static literal, then the array is not a static // literal. Turn that flag off. if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) { - pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL); + pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL); } if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) { - pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT); + pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT); } } @@ -2144,10 +2500,10 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) { * Set the closing token and end location of an array node. */ static void -pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) { - assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED); - node->base.location.end = closing->end; - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); +pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) { + assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + node->closing_loc = TOK2LOC(parser, closing); } /** @@ -2156,24 +2512,18 @@ pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) { */ static pm_array_pattern_node_t * pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) { - pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); - - *node = (pm_array_pattern_node_t) { - { - .type = PM_ARRAY_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = nodes->nodes[0]->location.start, - .end = nodes->nodes[nodes->size - 1]->location.end - }, - }, - .constant = NULL, - .rest = NULL, - .requireds = { 0 }, - .posts = { 0 }, - .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; + pm_array_pattern_node_t *node = pm_array_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]), + NULL, + ((pm_node_list_t) { 0 }), + NULL, + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }) + ); // For now we're going to just copy over each pointer manually. This could be // much more efficient, as we could instead resize the node list. @@ -2185,9 +2535,9 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node node->rest = child; found_rest = true; } else if (found_rest) { - pm_node_list_append(&node->posts, child); + pm_node_list_append(parser->arena, &node->posts, child); } else { - pm_node_list_append(&node->requireds, child); + pm_node_list_append(parser->arena, &node->requireds, child); } } @@ -2199,23 +2549,18 @@ pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *node */ static pm_array_pattern_node_t * pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) { - pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); - - *node = (pm_array_pattern_node_t) { - { - .type = PM_ARRAY_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = rest->location, - }, - .constant = NULL, - .rest = rest, - .requireds = { 0 }, - .posts = { 0 }, - .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; + return pm_array_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODE(rest), + NULL, + ((pm_node_list_t) { 0 }), + rest, + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }) + ); } /** @@ -2224,26 +2569,18 @@ pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) { */ static pm_array_pattern_node_t * pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) { - pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); - - *node = (pm_array_pattern_node_t) { - { - .type = PM_ARRAY_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = constant->location.start, - .end = closing->end - }, - }, - .constant = constant, - .rest = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), - .requireds = { 0 }, - .posts = { 0 } - }; - - return node; + return pm_array_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing), + constant, + ((pm_node_list_t) { 0 }), + NULL, + ((pm_node_list_t) { 0 }), + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } /** @@ -2252,31 +2589,23 @@ pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, */ static pm_array_pattern_node_t * pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) { - pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t); - - *node = (pm_array_pattern_node_t) { - { - .type = PM_ARRAY_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - }, - }, - .constant = NULL, - .rest = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), - .requireds = { 0 }, - .posts = { 0 } - }; - - return node; + return pm_array_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + NULL, + ((pm_node_list_t) { 0 }), + NULL, + ((pm_node_list_t) { 0 }), + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } -static inline void -pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) { - pm_node_list_append(&node->requireds, inner); +static PRISM_INLINE void +pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) { + pm_node_list_append(arena, &node->requireds, inner); } /** @@ -2284,15 +2613,14 @@ pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t */ static pm_assoc_node_t * pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) { - pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t); - const uint8_t *end; + uint32_t end; - if (value != NULL && value->location.end > key->location.end) { - end = value->location.end; - } else if (operator->type != PM_TOKEN_NOT_PROVIDED) { - end = operator->end; + if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) { + end = PM_NODE_END(value); + } else if (operator != NULL) { + end = PM_TOKEN_END(parser, operator); } else { - end = key->location.end; + end = PM_NODE_END(key); } // Hash string keys will be frozen, so we can mark them as frozen here so @@ -2312,22 +2640,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL; } - *node = (pm_assoc_node_t) { - { - .type = PM_ASSOC_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = key->location.start, - .end = end - }, - }, - .key = key, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_assoc_node_new( + parser->arena, + ++parser->node_id, + flags, + ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }), + key, + value, + NTOK2LOC(parser, operator) + ); } /** @@ -2336,22 +2657,15 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper static pm_assoc_splat_node_t * pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) { assert(operator->type == PM_TOKEN_USTAR_STAR); - pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t); - - *node = (pm_assoc_splat_node_t) { - { - .type = PM_ASSOC_SPLAT_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = value == NULL ? operator->end : value->location.end - }, - }, - .value = value, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - return node; + return pm_assoc_splat_node_new( + parser->arena, + ++parser->node_id, + 0, + (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value), + value, + TOK2LOC(parser, operator) + ); } /** @@ -2360,18 +2674,14 @@ pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token static pm_back_reference_read_node_t * pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) { assert(name->type == PM_TOKEN_BACK_REFERENCE); - pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t); - *node = (pm_back_reference_read_node_t) { - { - .type = PM_BACK_REFERENCE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name), - }, - .name = pm_parser_constant_id_token(parser, name) - }; - - return node; + return pm_back_reference_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + pm_parser_constant_id_token(parser, name) + ); } /** @@ -2379,23 +2689,21 @@ pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) */ static pm_begin_node_t * pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) { - pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t); - - *node = (pm_begin_node_t) { - { - .type = PM_BEGIN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = begin_keyword->start, - .end = statements == NULL ? begin_keyword->end : statements->base.location.end - }, - }, - .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword), - .statements = statements, - .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; + uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword); + uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements); + + return pm_begin_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + NTOK2LOC(parser, begin_keyword), + statements, + NULL, + NULL, + NULL, + ((pm_location_t) { 0 }) + ); } /** @@ -2403,11 +2711,10 @@ pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_st */ static void pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) { - // If the begin keyword doesn't exist, we set the start on the begin_node - if (!node->begin_keyword_loc.start) { - node->base.location.start = rescue_clause->base.location.start; + if (node->begin_keyword_loc.length == 0) { + PM_NODE_START_SET_NODE(node, rescue_clause); } - node->base.location.end = rescue_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, rescue_clause); node->rescue_clause = rescue_clause; } @@ -2416,7 +2723,10 @@ pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_ */ static void pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) { - node->base.location.end = else_clause->base.location.end; + if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) { + PM_NODE_START_SET_NODE(node, else_clause); + } + PM_NODE_LENGTH_SET_NODE(node, else_clause); node->else_clause = else_clause; } @@ -2425,7 +2735,10 @@ pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause */ static void pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) { - node->base.location.end = ensure_clause->base.location.end; + if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) { + PM_NODE_START_SET_NODE(node, ensure_clause); + } + PM_NODE_LENGTH_SET_NODE(node, ensure_clause); node->ensure_clause = ensure_clause; } @@ -2433,11 +2746,10 @@ pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_ * Set the end keyword and end location of a begin node. */ static void -pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) { - assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING); - - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword); +pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) { + assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** @@ -2445,22 +2757,16 @@ pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keywo */ static pm_block_argument_node_t * pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) { - pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t); - - *node = (pm_block_argument_node_t) { - { - .type = PM_BLOCK_ARGUMENT_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = expression == NULL ? operator->end : expression->location.end - }, - }, - .expression = expression, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + assert(operator->type == PM_TOKEN_UAMPERSAND); + + return pm_block_argument_node_new( + parser->arena, + ++parser->node_id, + 0, + (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression), + expression, + TOK2LOC(parser, operator) + ); } /** @@ -2468,22 +2774,17 @@ pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, p */ static pm_block_node_t * pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) { - pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t); - - *node = (pm_block_node_t) { - { - .type = PM_BLOCK_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { .start = opening->start, .end = closing->end }, - }, - .locals = *locals, - .parameters = parameters, - .body = body, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) - }; - - return node; + return pm_block_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + *locals, + parameters, + body, + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } /** @@ -2491,24 +2792,17 @@ pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p */ static pm_block_parameter_node_t * pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) { - assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND); - pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t); - - *node = (pm_block_parameter_node_t) { - { - .type = PM_BLOCK_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end) - }, - }, - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND); + + return pm_block_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name), + name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + NTOK2LOC(parser, name), + TOK2LOC(parser, operator) + ); } /** @@ -2516,53 +2810,44 @@ pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, cons */ static pm_block_parameters_node_t * pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) { - pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t); - - const uint8_t *start; - if (opening->type != PM_TOKEN_NOT_PROVIDED) { - start = opening->start; + uint32_t start; + if (opening != NULL) { + start = PM_TOKEN_START(parser, opening); } else if (parameters != NULL) { - start = parameters->base.location.start; + start = PM_NODE_START(parameters); } else { - start = NULL; + start = 0; } - const uint8_t *end; + uint32_t end; if (parameters != NULL) { - end = parameters->base.location.end; - } else if (opening->type != PM_TOKEN_NOT_PROVIDED) { - end = opening->end; + end = PM_NODE_END(parameters); + } else if (opening != NULL) { + end = PM_TOKEN_END(parser, opening); } else { - end = NULL; - } - - *node = (pm_block_parameters_node_t) { - { - .type = PM_BLOCK_PARAMETERS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = start, - .end = end - } - }, - .parameters = parameters, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .locals = { 0 } - }; - - return node; + end = 0; + } + + return pm_block_parameters_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + parameters, + ((pm_node_list_t) { 0 }), + NTOK2LOC(parser, opening), + ((pm_location_t) { 0 }) + ); } /** * Set the closing location of a BlockParametersNode node. */ static void -pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) { - assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING); - - node->base.location.end = closing->end; - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); +pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) { + assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + node->closing_loc = TOK2LOC(parser, closing); } /** @@ -2570,29 +2855,27 @@ pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_ */ static pm_block_local_variable_node_t * pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) { - pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t); - - *node = (pm_block_local_variable_node_t) { - { - .type = PM_BLOCK_LOCAL_VARIABLE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name), - }, - .name = pm_parser_constant_id_token(parser, name) - }; - - return node; + return pm_block_local_variable_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + pm_parser_constant_id_token(parser, name) + ); } /** * Append a new block-local variable to a BlockParametersNode node. */ static void -pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) { - pm_node_list_append(&node->locals, (pm_node_t *) local); +pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) { + pm_node_list_append(arena, &node->locals, UP(local)); + + if (PM_NODE_LENGTH(node) == 0) { + PM_NODE_START_SET_NODE(node, local); + } - if (node->base.location.start == NULL) node->base.location.start = local->base.location.start; - node->base.location.end = local->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, local); } /** @@ -2601,66 +2884,55 @@ pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm static pm_break_node_t * pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) { assert(keyword->type == PM_TOKEN_KEYWORD_BREAK); - pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t); - *node = (pm_break_node_t) { - { - .type = PM_BREAK_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = (arguments == NULL ? keyword->end : arguments->base.location.end) - }, - }, - .arguments = arguments, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) - }; - - return node; + return pm_break_node_new( + parser->arena, + ++parser->node_id, + 0, + (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments), + arguments, + TOK2LOC(parser, keyword) + ); } // There are certain flags that we want to use internally but don't want to // expose because they are not relevant beyond parsing. Therefore we'll define // them here and not define them in config.yml/a header file. -static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4; -static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40; -static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80; -static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100; +static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2); + +static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1); +static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2); +static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3); /** - * Allocate and initialize a new CallNode node. This sets everything to NULL or - * PM_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden - * in the various specializations of this function. + * Allocate and initialize a new CallNode node. This sets everything to NULL + * such that its values can be overridden in the various specializations of this + * function. */ static pm_call_node_t * pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) { - pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t); - - *node = (pm_call_node_t) { - { - .type = PM_CALL_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser), - }, - .receiver = NULL, - .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .arguments = NULL, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .block = NULL, - .name = 0 - }; - - return node; + return pm_call_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_UNSET, + NULL, + ((pm_location_t) { 0 }), + 0, + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + NULL, + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + NULL + ); } /** * Returns the value that the ignore visibility flag should be set to for the * given receiver. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) { return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0; } @@ -2680,12 +2952,15 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_ pm_call_node_t *node = pm_call_node_create(parser, flags); - node->base.location.start = receiver->location.start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); node->receiver = receiver; node->message_loc.start = arguments->opening_loc.start; - node->message_loc.end = arguments->closing_loc.end; + node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start; node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; @@ -2706,20 +2981,22 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags); - node->base.location.start = MIN(receiver->location.start, argument->location.start); - node->base.location.end = MAX(receiver->location.end, argument->location.end); + PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument); + PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument); node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = TOK2LOC(parser, operator); pm_arguments_node_t *arguments = pm_arguments_node_create(parser); - pm_arguments_node_arguments_append(arguments, argument); + pm_arguments_node_arguments_append(parser->arena, arguments, argument); node->arguments = arguments; node->name = pm_parser_constant_id_token(parser, operator); return node; } +static const uint8_t * parse_operator_symbol_name(const pm_token_t *); + /** * Allocate and initialize a new CallNode node from a call expression. */ @@ -2729,26 +3006,31 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = receiver->location.start; - const uint8_t *end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + const pm_location_t *end = pm_arguments_end(arguments); if (end == NULL) { - end = message->end; + PM_NODE_LENGTH_SET_TOKEN(parser, node, message); + } else { + PM_NODE_LENGTH_SET_LOCATION(node, end); } - node->base.location.end = end; node->receiver = receiver; - node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->call_operator_loc = TOK2LOC(parser, operator); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; node->block = arguments->block; if (operator->type == PM_TOKEN_AMPERSAND_DOT) { - pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION); + pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION); } - node->name = pm_parser_constant_id_token(parser, message); + /** + * If the final character is `@` as is the case for `foo.~@`, + * we should ignore the @ in the same way we do for symbols. + */ + node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message)); return node; } @@ -2758,12 +3040,9 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o static pm_call_node_t * pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) { pm_call_node_t *node = pm_call_node_create(parser, 0); - node->base.location.start = parser->start; - node->base.location.end = parser->end; + node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) }; node->receiver = receiver; - node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL }; - node->message_loc = (pm_location_t) { .start = NULL, .end = NULL }; node->arguments = arguments; node->name = pm_parser_constant_id_constant(parser, message, strlen(message)); @@ -2778,10 +3057,12 @@ static pm_call_node_t * pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location.start = message->start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_TOKEN(parser, node, message); + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2799,7 +3080,7 @@ static pm_call_node_t * pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location = PM_LOCATION_NULL_VALUE(parser); + node->base.location = (pm_location_t) { 0 }; node->arguments = arguments; node->name = name; @@ -2816,16 +3097,16 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = message->start; - if (arguments->closing_loc.start != NULL) { - node->base.location.end = arguments->closing_loc.end; + PM_NODE_START_SET_TOKEN(parser, node, message); + if (arguments->closing_loc.length > 0) { + PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc); } else { assert(receiver != NULL); - node->base.location.end = receiver->location.end; + PM_NODE_LENGTH_SET_NODE(node, receiver); } node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->message_loc = TOK2LOC(parser, message); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; @@ -2843,18 +3124,20 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = receiver->location.start; - node->base.location.end = pm_arguments_end(arguments); + PM_NODE_START_SET_NODE(node, receiver); + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + PM_NODE_LENGTH_SET_LOCATION(node, end); node->receiver = receiver; - node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->call_operator_loc = TOK2LOC(parser, operator); node->opening_loc = arguments->opening_loc; node->arguments = arguments->arguments; node->closing_loc = arguments->closing_loc; node->block = arguments->block; if (operator->type == PM_TOKEN_AMPERSAND_DOT) { - pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION); + pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION); } node->name = pm_parser_constant_id_constant(parser, "call", 4); @@ -2870,11 +3153,11 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t * pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver)); - node->base.location.start = operator->start; - node->base.location.end = receiver->location.end; + PM_NODE_START_SET_TOKEN(parser, node, operator); + PM_NODE_LENGTH_SET_NODE(node, receiver); node->receiver = receiver; - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = TOK2LOC(parser, operator); node->name = pm_parser_constant_id_constant(parser, name, strlen(name)); return node; @@ -2888,8 +3171,8 @@ static pm_call_node_t * pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY); - node->base.location = PM_LOCATION_TOKEN_VALUE(message); - node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->base.location = TOK2LOC(parser, message); + node->message_loc = TOK2LOC(parser, message); node->name = pm_parser_constant_id_token(parser, message); return node; @@ -2899,14 +3182,14 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) { * Returns whether or not this call can be used on the left-hand side of an * operator assignment. */ -static inline bool +static PRISM_INLINE bool pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) { return ( - (node->message_loc.start != NULL) && - (node->message_loc.end[-1] != '!') && - (node->message_loc.end[-1] != '?') && - char_is_identifier_start(parser, node->message_loc.start, parser->end - node->message_loc.start) && - (node->opening_loc.start == NULL) && + (node->message_loc.length > 0) && + (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') && + (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') && + char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) && + (node->opening_loc.length == 0) && (node->arguments == NULL) && (node->block == NULL) ); @@ -2922,10 +3205,10 @@ pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, p if (write_constant->length > 0) { size_t length = write_constant->length - 1; - void *memory = xmalloc(length); + uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1); memcpy(memory, write_constant->start, length); - *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length); + *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length); } else { // We can get here if the message was missing because of a syntax error. *read_name = pm_parser_constant_id_constant(parser, "", 0); @@ -2939,33 +3222,25 @@ static pm_call_and_write_node_t * pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(target->block == NULL); assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t); - *node = (pm_call_and_write_node_t) { - { - .type = PM_CALL_AND_WRITE_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .message_loc = target->message_loc, - .read_name = 0, - .write_name = target->name, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; + pm_call_and_write_node_t *node = pm_call_and_write_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODES(target, value), + target->receiver, + target->call_operator_loc, + target->message_loc, + 0, + target->name, + TOK2LOC(parser, operator), + value + ); pm_call_write_read_name_init(parser, &node->read_name, &node->write_name); - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -2976,7 +3251,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const */ static void pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) { - if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) { + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) { if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) { pm_node_t *node; PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) { @@ -2999,35 +3274,28 @@ pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *argumen static pm_index_and_write_node_t * pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t); pm_index_arguments_check(parser, target->arguments, target->block); assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); - *node = (pm_index_and_write_node_t) { - { - .type = PM_INDEX_AND_WRITE_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .opening_loc = target->opening_loc, - .arguments = target->arguments, - .closing_loc = target->closing_loc, - .block = (pm_block_argument_node_t *) target->block, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + pm_index_and_write_node_t *node = pm_index_and_write_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODES(target, value), + target->receiver, + target->call_operator_loc, + target->opening_loc, + target->arguments, + target->closing_loc, + (pm_block_argument_node_t *) target->block, + TOK2LOC(parser, operator), + value + ); + + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3038,34 +3306,26 @@ pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, cons static pm_call_operator_write_node_t * pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(target->block == NULL); - pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t); - *node = (pm_call_operator_write_node_t) { - { - .type = PM_CALL_OPERATOR_WRITE_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .message_loc = target->message_loc, - .read_name = 0, - .write_name = target->name, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; + pm_call_operator_write_node_t *node = pm_call_operator_write_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODES(target, value), + target->receiver, + target->call_operator_loc, + target->message_loc, + 0, + target->name, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + TOK2LOC(parser, operator), + value + ); pm_call_write_read_name_init(parser, &node->read_name, &node->write_name); - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3075,36 +3335,28 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, */ static pm_index_operator_write_node_t * pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t); - pm_index_arguments_check(parser, target->arguments, target->block); assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); - *node = (pm_index_operator_write_node_t) { - { - .type = PM_INDEX_OPERATOR_WRITE_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .opening_loc = target->opening_loc, - .arguments = target->arguments, - .closing_loc = target->closing_loc, - .block = (pm_block_argument_node_t *) target->block, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + pm_index_operator_write_node_t *node = pm_index_operator_write_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODES(target, value), + target->receiver, + target->call_operator_loc, + target->opening_loc, + target->arguments, + target->closing_loc, + (pm_block_argument_node_t *) target->block, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + TOK2LOC(parser, operator), + value + ); + + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3116,33 +3368,25 @@ static pm_call_or_write_node_t * pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(target->block == NULL); assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t); - *node = (pm_call_or_write_node_t) { - { - .type = PM_CALL_OR_WRITE_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .message_loc = target->message_loc, - .read_name = 0, - .write_name = target->name, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; + pm_call_or_write_node_t *node = pm_call_or_write_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODES(target, value), + target->receiver, + target->call_operator_loc, + target->message_loc, + 0, + target->name, + TOK2LOC(parser, operator), + value + ); pm_call_write_read_name_init(parser, &node->read_name, &node->write_name); - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3153,35 +3397,28 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const static pm_index_or_write_node_t * pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t); pm_index_arguments_check(parser, target->arguments, target->block); assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); - *node = (pm_index_or_write_node_t) { - { - .type = PM_INDEX_OR_WRITE_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .opening_loc = target->opening_loc, - .arguments = target->arguments, - .closing_loc = target->closing_loc, - .block = (pm_block_argument_node_t *) target->block, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + pm_index_or_write_node_t *node = pm_index_or_write_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODES(target, value), + target->receiver, + target->call_operator_loc, + target->opening_loc, + target->arguments, + target->closing_loc, + (pm_block_argument_node_t *) target->block, + TOK2LOC(parser, operator), + value + ); + + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3192,25 +3429,27 @@ pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const */ static pm_call_target_node_t * pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { - pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t); + pm_call_target_node_t *node = pm_call_target_node_new( + parser->arena, + ++parser->node_id, + FL(target), + PM_LOCATION_INIT_NODE(target), + target->receiver, + target->call_operator_loc, + target->name, + target->message_loc + ); - *node = (pm_call_target_node_t) { - { - .type = PM_CALL_TARGET_NODE, - .flags = target->base.flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = target->base.location - }, - .receiver = target->receiver, - .call_operator_loc = target->call_operator_loc, - .name = target->name, - .message_loc = target->message_loc - }; + /* It is possible to get here where we have parsed an invalid syntax tree + * where the call operator was not present. In that case we will have a + * problem because it is a required location. In this case we need to fill + * it in with a fake location so that the syntax tree remains valid. */ + if (node->call_operator_loc.length == 0) { + node->call_operator_loc = target->base.location; + } - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3221,30 +3460,23 @@ pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { */ static pm_index_target_node_t * pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { - pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t); - pm_node_flags_t flags = target->base.flags; - pm_index_arguments_check(parser, target->arguments, target->block); - assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE)); - *node = (pm_index_target_node_t) { - { - .type = PM_INDEX_TARGET_NODE, - .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = target->base.location - }, - .receiver = target->receiver, - .opening_loc = target->opening_loc, - .arguments = target->arguments, - .closing_loc = target->closing_loc, - .block = (pm_block_argument_node_t *) target->block, - }; - // Here we're going to free the target, since it is no longer necessary. - // However, we don't want to call `pm_node_destroy` because we want to keep - // around all of its children since we just reused them. - xfree(target); + pm_index_target_node_t *node = pm_index_target_node_new( + parser->arena, + ++parser->node_id, + FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE, + PM_LOCATION_INIT_NODE(target), + target->receiver, + target->opening_loc, + target->arguments, + target->closing_loc, + (pm_block_argument_node_t *) target->block + ); + + // The target is no longer necessary because we've reused its children. + // It is arena-allocated so no explicit free is needed. return node; } @@ -3254,23 +3486,15 @@ pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) { */ static pm_capture_pattern_node_t * pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) { - pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t); - - *node = (pm_capture_pattern_node_t) { - { - .type = PM_CAPTURE_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = value->location.start, - .end = target->base.location.end - }, - }, - .value = value, - .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_capture_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(value, target), + value, + target, + TOK2LOC(parser, operator) + ); } /** @@ -3278,36 +3502,28 @@ pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_v */ static pm_case_node_t * pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) { - pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t); - - *node = (pm_case_node_t) { - { - .type = PM_CASE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = case_keyword->start, - .end = end_keyword->end - }, - }, - .predicate = predicate, - .else_clause = NULL, - .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), - .conditions = { 0 } - }; - - return node; + return pm_case_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword), + predicate, + ((pm_node_list_t) { 0 }), + NULL, + TOK2LOC(parser, case_keyword), + NTOK2LOC(parser, end_keyword) + ); } /** * Append a new condition to a CaseNode node. */ static void -pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) { +pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) { assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE)); - pm_node_list_append(&node->conditions, condition); - node->base.location.end = condition->location.end; + pm_node_list_append(arena, &node->conditions, condition); + PM_NODE_LENGTH_SET_NODE(node, condition); } /** @@ -3316,53 +3532,45 @@ pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) { static void pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) { node->else_clause = else_clause; - node->base.location.end = else_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, else_clause); } /** * Set the end location for a CaseNode node. */ static void -pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) { - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); +pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** * Allocate and initialize a new CaseMatchNode node. */ static pm_case_match_node_t * -pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) { - pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t); - - *node = (pm_case_match_node_t) { - { - .type = PM_CASE_MATCH_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = case_keyword->start, - .end = end_keyword->end - }, - }, - .predicate = predicate, - .else_clause = NULL, - .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), - .conditions = { 0 } - }; - - return node; +pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) { + return pm_case_match_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, case_keyword), + predicate, + ((pm_node_list_t) { 0 }), + NULL, + TOK2LOC(parser, case_keyword), + ((pm_location_t) { 0 }) + ); } /** * Append a new condition to a CaseMatchNode node. */ static void -pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) { +pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) { assert(PM_NODE_TYPE_P(condition, PM_IN_NODE)); - pm_node_list_append(&node->conditions, condition); - node->base.location.end = condition->location.end; + pm_node_list_append(arena, &node->conditions, condition); + PM_NODE_LENGTH_SET_NODE(node, condition); } /** @@ -3371,16 +3579,16 @@ pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condi static void pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) { node->else_clause = else_clause; - node->base.location.end = else_clause->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, else_clause); } /** * Set the end location for a CaseMatchNode node. */ static void -pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) { - node->base.location.end = end_keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); +pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); + node->end_keyword_loc = TOK2LOC(parser, end_keyword); } /** @@ -3388,25 +3596,20 @@ pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_toke */ static pm_class_node_t * pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) { - pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t); - - *node = (pm_class_node_t) { - { - .type = PM_CLASS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { .start = class_keyword->start, .end = end_keyword->end }, - }, - .locals = *locals, - .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword), - .constant_path = constant_path, - .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator), - .superclass = superclass, - .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), - .name = pm_parser_constant_id_token(parser, name) - }; - - return node; + return pm_class_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword), + *locals, + TOK2LOC(parser, class_keyword), + constant_path, + NTOK2LOC(parser, inheritance_operator), + superclass, + body, + TOK2LOC(parser, end_keyword), + pm_parser_constant_id_token(parser, name) + ); } /** @@ -3415,24 +3618,17 @@ pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const p static pm_class_variable_and_write_node_t * pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t); - *node = (pm_class_variable_and_write_node_t) { - { - .type = PM_CLASS_VARIABLE_AND_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_class_variable_and_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value + ); } /** @@ -3440,25 +3636,17 @@ pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_r */ static pm_class_variable_operator_write_node_t * pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t); - - *node = (pm_class_variable_operator_write_node_t) { - { - .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) - }; - - return node; + return pm_class_variable_operator_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) + ); } /** @@ -3467,24 +3655,17 @@ pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_varia static pm_class_variable_or_write_node_t * pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t); - - *node = (pm_class_variable_or_write_node_t) { - { - .type = PM_CLASS_VARIABLE_OR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - return node; + return pm_class_variable_or_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value + ); } /** @@ -3493,18 +3674,14 @@ pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_re static pm_class_variable_read_node_t * pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_CLASS_VARIABLE); - pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t); - - *node = (pm_class_variable_read_node_t) { - { - .type = PM_CLASS_VARIABLE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .name = pm_parser_constant_id_token(parser, token) - }; - return node; + return pm_class_variable_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token), + pm_parser_constant_id_token(parser, token) + ); } /** @@ -3513,9 +3690,9 @@ pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) * a = *b * a = 1, 2, 3 */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) { - if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) { + if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) { return flags; } return 0; @@ -3526,25 +3703,16 @@ pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) { */ static pm_class_variable_write_node_t * pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) { - pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t); - - *node = (pm_class_variable_write_node_t) { - { - .type = PM_CLASS_VARIABLE_WRITE_NODE, - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = read_node->base.location.start, - .end = value->location.end - }, - }, - .name = read_node->name, - .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_class_variable_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + PM_LOCATION_INIT_NODES(read_node, value), + read_node->name, + read_node->base.location, + value, + TOK2LOC(parser, operator) + ); } /** @@ -3553,23 +3721,16 @@ pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_ static pm_constant_path_and_write_node_t * pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t); - - *node = (pm_constant_path_and_write_node_t) { - { - .type = PM_CONSTANT_PATH_AND_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - return node; + return pm_constant_path_and_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target, + TOK2LOC(parser, operator), + value + ); } /** @@ -3577,24 +3738,16 @@ pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_nod */ static pm_constant_path_operator_write_node_t * pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t); - - *node = (pm_constant_path_operator_write_node_t) { - { - .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .target = target, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) - }; - - return node; + return pm_constant_path_operator_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target, + TOK2LOC(parser, operator), + value, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) + ); } /** @@ -3603,23 +3756,16 @@ pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_pat static pm_constant_path_or_write_node_t * pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t); - *node = (pm_constant_path_or_write_node_t) { - { - .type = PM_CONSTANT_PATH_OR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .target = target, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_constant_path_or_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target, + TOK2LOC(parser, operator), + value + ); } /** @@ -3628,29 +3774,22 @@ pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node static pm_constant_path_node_t * pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) { pm_assert_value_expression(parser, parent); - pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t); pm_constant_id_t name = PM_CONSTANT_ID_UNSET; if (name_token->type == PM_TOKEN_CONSTANT) { name = pm_parser_constant_id_token(parser, name_token); } - *node = (pm_constant_path_node_t) { - { - .type = PM_CONSTANT_PATH_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = parent == NULL ? delimiter->start : parent->location.start, - .end = name_token->end - }, - }, - .parent = parent, - .name = name, - .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter), - .name_loc = PM_LOCATION_TOKEN_VALUE(name_token) - }; - - return node; + return pm_constant_path_node_new( + parser->arena, + ++parser->node_id, + 0, + (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token), + parent, + name, + TOK2LOC(parser, delimiter), + TOK2LOC(parser, name_token) + ); } /** @@ -3658,24 +3797,15 @@ pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_to */ static pm_constant_path_write_node_t * pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t); - - *node = (pm_constant_path_write_node_t) { - { - .type = PM_CONSTANT_PATH_WRITE_NODE, - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - }, - }, - .target = target, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_constant_path_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + PM_LOCATION_INIT_NODES(target, value), + target, + TOK2LOC(parser, operator), + value + ); } /** @@ -3684,24 +3814,17 @@ pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t static pm_constant_and_write_node_t * pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t); - *node = (pm_constant_and_write_node_t) { - { - .type = PM_CONSTANT_AND_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_constant_and_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value + ); } /** @@ -3709,25 +3832,17 @@ pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t * */ static pm_constant_operator_write_node_t * pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t); - - *node = (pm_constant_operator_write_node_t) { - { - .type = PM_CONSTANT_OPERATOR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) - }; - - return node; + return pm_constant_operator_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) + ); } /** @@ -3736,24 +3851,17 @@ pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_nod static pm_constant_or_write_node_t * pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t); - - *node = (pm_constant_or_write_node_t) { - { - .type = PM_CONSTANT_OR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - return node; + return pm_constant_or_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value + ); } /** @@ -3761,19 +3869,15 @@ pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *t */ static pm_constant_read_node_t * pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) { - assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING); - pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t); - - *node = (pm_constant_read_node_t) { - { - .type = PM_CONSTANT_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name) - }, - .name = pm_parser_constant_id_token(parser, name) - }; - - return node; + assert(name->type == PM_TOKEN_CONSTANT || name->type == 0); + + return pm_constant_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + pm_parser_constant_id_token(parser, name) + ); } /** @@ -3781,25 +3885,16 @@ pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) { */ static pm_constant_write_node_t * pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t); - - *node = (pm_constant_write_node_t) { - { - .type = PM_CONSTANT_WRITE_NODE, - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_constant_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + value, + TOK2LOC(parser, operator) + ); } /** @@ -3810,7 +3905,7 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_BEGIN_NODE: { const pm_begin_node_t *cast = (pm_begin_node_t *) node; - if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements); + if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements)); break; } case PM_PARENTHESES_NODE: { @@ -3865,65 +3960,45 @@ pm_def_node_create( const pm_token_t *equal, const pm_token_t *end_keyword ) { - pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t); - const uint8_t *end; - - if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) { - end = body->location.end; - } else { - end = end_keyword->end; - } - - if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) { + if (receiver != NULL) { pm_def_node_receiver_check(parser, receiver); } - *node = (pm_def_node_t) { - { - .type = PM_DEF_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { .start = def_keyword->start, .end = end }, - }, - .name = name, - .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc), - .receiver = receiver, - .parameters = parameters, - .body = body, - .locals = *locals, - .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), - .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), - .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal), - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) - }; - - return node; + return pm_def_node_new( + parser->arena, + ++parser->node_id, + 0, + (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword), + name, + TOK2LOC(parser, name_loc), + receiver, + parameters, + body, + *locals, + TOK2LOC(parser, def_keyword), + NTOK2LOC(parser, operator), + NTOK2LOC(parser, lparen), + NTOK2LOC(parser, rparen), + NTOK2LOC(parser, equal), + NTOK2LOC(parser, end_keyword) + ); } /** * Allocate a new DefinedNode node. */ static pm_defined_node_t * -pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) { - pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t); - - *node = (pm_defined_node_t) { - { - .type = PM_DEFINED_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword_loc->start, - .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end) - }, - }, - .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), - .value = value, - .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), - .keyword_loc = *keyword_loc - }; - - return node; +pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) { + return pm_defined_node_new( + parser->arena, + ++parser->node_id, + 0, + (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen), + NTOK2LOC(parser, lparen), + value, + NTOK2LOC(parser, rparen), + TOK2LOC(parser, keyword) + ); } /** @@ -3931,29 +4006,15 @@ pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t */ static pm_else_node_t * pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) { - pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t); - const uint8_t *end = NULL; - if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) { - end = statements->base.location.end; - } else { - end = end_keyword->end; - } - - *node = (pm_else_node_t) { - { - .type = PM_ELSE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = else_keyword->start, - .end = end, - }, - }, - .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword), - .statements = statements, - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) - }; - - return node; + return pm_else_node_new( + parser->arena, + ++parser->node_id, + 0, + ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword), + TOK2LOC(parser, else_keyword), + statements, + NTOK2LOC(parser, end_keyword) + ); } /** @@ -3961,23 +4022,15 @@ pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_stat */ static pm_embedded_statements_node_t * pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) { - pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t); - - *node = (pm_embedded_statements_node_t) { - { - .type = PM_EMBEDDED_STATEMENTS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - } - }, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .statements = statements, - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) - }; - - return node; + return pm_embedded_statements_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + TOK2LOC(parser, opening), + statements, + TOK2LOC(parser, closing) + ); } /** @@ -3985,22 +4038,14 @@ pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *openin */ static pm_embedded_variable_node_t * pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) { - pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t); - - *node = (pm_embedded_variable_node_t) { - { - .type = PM_EMBEDDED_VARIABLE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = variable->location.end - } - }, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .variable = variable - }; - - return node; + return pm_embedded_variable_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable), + TOK2LOC(parser, operator), + variable + ); } /** @@ -4008,23 +4053,15 @@ pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator */ static pm_ensure_node_t * pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) { - pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t); - - *node = (pm_ensure_node_t) { - { - .type = PM_ENSURE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = ensure_keyword->start, - .end = end_keyword->end - }, - }, - .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword), - .statements = statements, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) - }; - - return node; + return pm_ensure_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword), + TOK2LOC(parser, ensure_keyword), + statements, + TOK2LOC(parser, end_keyword) + ); } /** @@ -4033,16 +4070,13 @@ pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_ static pm_false_node_t * pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_FALSE); - pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t); - *node = (pm_false_node_t) {{ - .type = PM_FALSE_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - - return node; + return pm_false_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -4051,50 +4085,31 @@ pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_find_pattern_node_t * pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) { - pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t); - + assert(nodes->size >= 2); pm_node_t *left = nodes->nodes[0]; - assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE)); - pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left; - - pm_node_t *right; + pm_node_t *right = nodes->nodes[nodes->size - 1]; - if (nodes->size == 1) { - right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end); - } else { - right = nodes->nodes[nodes->size - 1]; - assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE)); - } - -#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS - // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode. - // The resulting AST will anyway be ignored, but this file still needs to compile. - pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node; -#else - pm_node_t *right_splat_node = right; -#endif - *node = (pm_find_pattern_node_t) { - { - .type = PM_FIND_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = left->location.start, - .end = right->location.end, - }, - }, - .constant = NULL, - .left = left_splat_node, - .right = right_splat_node, - .requireds = { 0 }, - .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; + assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE)); + assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE)); + + pm_find_pattern_node_t *node = pm_find_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(left, right), + NULL, + (pm_splat_node_t *) left, + ((pm_node_list_t) { 0 }), + (pm_splat_node_t *) right, + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }) + ); // For now we're going to just copy over each pointer manually. This could be // much more efficient, as we could instead resize the node list to only point // to 1...-1. for (size_t index = 1; index < nodes->size - 1; index++) { - pm_node_list_append(&node->requireds, nodes->nodes[index]); + pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]); } return node; @@ -4111,7 +4126,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { // First, get a buffer of the content. size_t length = (size_t) diff; - char *buffer = xmalloc(sizeof(char) * (length + 1)); + const size_t buffer_size = sizeof(char) * (length + 1); + char *buffer = xmalloc(buffer_size); memcpy((void *) buffer, token->start, length); // Next, determine if we need to replace the decimal point because of @@ -4145,8 +4161,8 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { // This should never happen, because we've already checked that the token // is in a valid format. However it's good to be safe. if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE); - xfree((void *) buffer); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE); + xfree_sized(buffer, buffer_size); return 0.0; } @@ -4164,12 +4180,12 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { ellipsis = ""; } - pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); + pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis); value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL; } // Finally we can free the buffer and return the value. - xfree((void *) buffer); + xfree_sized(buffer, buffer_size); return value; } @@ -4179,19 +4195,14 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) { static pm_float_node_t * pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_FLOAT); - pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t); - *node = (pm_float_node_t) { - { - .type = PM_FLOAT_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .value = pm_double_parse(parser, token) - }; - - return node; + return pm_float_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + pm_double_parse(parser, token) + ); } /** @@ -4201,22 +4212,17 @@ static pm_imaginary_node_t * pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_FLOAT_IMAGINARY); - pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); - *node = (pm_imaginary_node_t) { - { - .type = PM_IMAGINARY_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) { + return pm_imaginary_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + UP(pm_float_node_create(parser, &((pm_token_t) { .type = PM_TOKEN_FLOAT, .start = token->start, .end = token->end - 1 - })) - }; - - return node; + }))) + ); } /** @@ -4226,17 +4232,14 @@ static pm_rational_node_t * pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_FLOAT_RATIONAL); - pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t); - *node = (pm_rational_node_t) { - { - .type = PM_RATIONAL_NODE, - .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .numerator = { 0 }, - .denominator = { 0 } - }; + pm_rational_node_t *node = pm_rational_node_new( + parser->arena, + ++parser->node_id, + PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + ((pm_integer_t) { 0 }), + ((pm_integer_t) { 0 }) + ); const uint8_t *start = token->start; const uint8_t *end = token->end - 1; // r @@ -4253,7 +4256,7 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) { const uint8_t *point = memchr(start, '.', length); assert(point && "should have a decimal point"); - uint8_t *digits = malloc(length); + uint8_t *digits = xmalloc(length); if (digits == NULL) { fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr); abort(); @@ -4263,12 +4266,18 @@ pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) { memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1)); pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1); + size_t fract_length = 0; + for (const uint8_t *fract = point; fract < end; ++fract) { + if (*fract != '_') ++fract_length; + } digits[0] = '1'; - if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1)); - pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point)); - free(digits); + if (fract_length > 1) memset(digits + 1, '0', fract_length - 1); + pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length); + xfree_sized(digits, length); pm_integers_reduce(&node->numerator, &node->denominator); + pm_integer_arena_move(parser->arena, &node->numerator); + pm_integer_arena_move(parser->arena, &node->denominator); return node; } @@ -4280,22 +4289,17 @@ static pm_imaginary_node_t * pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY); - pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); - *node = (pm_imaginary_node_t) { - { - .type = PM_IMAGINARY_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) { + return pm_imaginary_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + UP(pm_float_node_rational_create(parser, &((pm_token_t) { .type = PM_TOKEN_FLOAT_RATIONAL, .start = token->start, .end = token->end - 1 - })) - }; - - return node; + }))) + ); } /** @@ -4312,27 +4316,19 @@ pm_for_node_create( const pm_token_t *do_keyword, const pm_token_t *end_keyword ) { - pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t); - - *node = (pm_for_node_t) { - { - .type = PM_FOR_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = for_keyword->start, - .end = end_keyword->end - }, - }, - .index = index, - .collection = collection, - .statements = statements, - .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword), - .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) - }; - - return node; + return pm_for_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword), + index, + collection, + statements, + TOK2LOC(parser, for_keyword), + TOK2LOC(parser, in_keyword), + NTOK2LOC(parser, do_keyword), + TOK2LOC(parser, end_keyword) + ); } /** @@ -4341,15 +4337,13 @@ pm_for_node_create( static pm_forwarding_arguments_node_t * pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_UDOT_DOT_DOT); - pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t); - - *node = (pm_forwarding_arguments_node_t) {{ - .type = PM_FORWARDING_ARGUMENTS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - return node; + return pm_forwarding_arguments_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -4358,15 +4352,13 @@ pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token static pm_forwarding_parameter_node_t * pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_UDOT_DOT_DOT); - pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t); - - *node = (pm_forwarding_parameter_node_t) {{ - .type = PM_FORWARDING_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - return node; + return pm_forwarding_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -4376,26 +4368,20 @@ static pm_forwarding_super_node_t * pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) { assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE)); assert(token->type == PM_TOKEN_KEYWORD_SUPER); - pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t); pm_block_node_t *block = NULL; if (arguments->block != NULL) { block = (pm_block_node_t *) arguments->block; } - *node = (pm_forwarding_super_node_t) { - { - .type = PM_FORWARDING_SUPER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = token->start, - .end = block != NULL ? block->base.location.end : token->end - }, - }, - .block = block - }; - - return node; + return pm_forwarding_super_node_new( + parser->arena, + ++parser->node_id, + 0, + (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block), + PM_LOCATION_INIT_TOKEN(parser, token), + block + ); } /** @@ -4404,25 +4390,17 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm */ static pm_hash_pattern_node_t * pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) { - pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t); - - *node = (pm_hash_pattern_node_t) { - { - .type = PM_HASH_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - }, - }, - .constant = NULL, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), - .elements = { 0 }, - .rest = NULL - }; - - return node; + return pm_hash_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + NULL, + ((pm_node_list_t) { 0 }), + NULL, + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } /** @@ -4430,46 +4408,36 @@ pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening */ static pm_hash_pattern_node_t * pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) { - pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t); - - const uint8_t *start; - const uint8_t *end; + uint32_t start; + uint32_t end; if (elements->size > 0) { if (rest) { - start = elements->nodes[0]->location.start; - end = rest->location.end; + start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0])); + end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1])); } else { - start = elements->nodes[0]->location.start; - end = elements->nodes[elements->size - 1]->location.end; + start = PM_NODE_START(elements->nodes[0]); + end = PM_NODE_END(elements->nodes[elements->size - 1]); } } else { assert(rest != NULL); - start = rest->location.start; - end = rest->location.end; - } - - *node = (pm_hash_pattern_node_t) { - { - .type = PM_HASH_PATTERN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = start, - .end = end - }, - }, - .constant = NULL, - .elements = { 0 }, - .rest = rest, - .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - pm_node_t *element; - PM_NODE_LIST_FOREACH(elements, index, element) { - pm_node_list_append(&node->elements, element); - } + start = PM_NODE_START(rest); + end = PM_NODE_END(rest); + } + + pm_hash_pattern_node_t *node = pm_hash_pattern_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + NULL, + ((pm_node_list_t) { 0 }), + rest, + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }) + ); + pm_node_list_concat(parser->arena, &node->elements, elements); return node; } @@ -4486,7 +4454,7 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) { case PM_NUMBERED_REFERENCE_READ_NODE: // This will only ever happen in the event of a syntax error, but we // still need to provide something for the node. - return pm_parser_constant_id_location(parser, target->location.start, target->location.end); + return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target)); default: assert(false && "unreachable"); return (pm_constant_id_t) -1; @@ -4499,24 +4467,17 @@ pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) { static pm_global_variable_and_write_node_t * pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t); - - *node = (pm_global_variable_and_write_node_t) { - { - .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->location.start, - .end = value->location.end - } - }, - .name = pm_global_variable_write_name(parser, target), - .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - return node; + return pm_global_variable_and_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + pm_global_variable_write_name(parser, target), + target->location, + TOK2LOC(parser, operator), + value + ); } /** @@ -4524,25 +4485,17 @@ pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, */ static pm_global_variable_operator_write_node_t * pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t); - - *node = (pm_global_variable_operator_write_node_t) { - { - .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->location.start, - .end = value->location.end - } - }, - .name = pm_global_variable_write_name(parser, target), - .name_loc = target->location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) - }; - - return node; + return pm_global_variable_operator_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + pm_global_variable_write_name(parser, target), + target->location, + TOK2LOC(parser, operator), + value, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) + ); } /** @@ -4551,24 +4504,17 @@ pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *ta static pm_global_variable_or_write_node_t * pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t); - *node = (pm_global_variable_or_write_node_t) { - { - .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->location.start, - .end = value->location.end - } - }, - .name = pm_global_variable_write_name(parser, target), - .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_global_variable_or_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + pm_global_variable_write_name(parser, target), + target->location, + TOK2LOC(parser, operator), + value + ); } /** @@ -4576,18 +4522,13 @@ pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, */ static pm_global_variable_read_node_t * pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) { - pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t); - - *node = (pm_global_variable_read_node_t) { - { - .type = PM_GLOBAL_VARIABLE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name), - }, - .name = pm_parser_constant_id_token(parser, name) - }; - - return node; + return pm_global_variable_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + pm_parser_constant_id_token(parser, name) + ); } /** @@ -4595,18 +4536,13 @@ pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) */ static pm_global_variable_read_node_t * pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) { - pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t); - - *node = (pm_global_variable_read_node_t) { - { - .type = PM_GLOBAL_VARIABLE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser) - }, - .name = name - }; - - return node; + return pm_global_variable_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + name + ); } /** @@ -4614,25 +4550,16 @@ pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant */ static pm_global_variable_write_node_t * pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t); - - *node = (pm_global_variable_write_node_t) { - { - .type = PM_GLOBAL_VARIABLE_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .location = { - .start = target->location.start, - .end = value->location.end - }, - }, - .name = pm_global_variable_write_name(parser, target), - .name_loc = PM_LOCATION_NODE_VALUE(target), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_global_variable_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + PM_LOCATION_INIT_NODES(target, value), + pm_global_variable_write_name(parser, target), + target->location, + value, + TOK2LOC(parser, operator) + ); } /** @@ -4640,21 +4567,16 @@ pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, con */ static pm_global_variable_write_node_t * pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) { - pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t); - - *node = (pm_global_variable_write_node_t) { - { - .type = PM_GLOBAL_VARIABLE_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser) - }, - .name = name, - .name_loc = PM_LOCATION_NULL_VALUE(parser), - .operator_loc = PM_LOCATION_NULL_VALUE(parser), - .value = value - }; - - return node; + return pm_global_variable_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + name, + ((pm_location_t) { 0 }), + value, + ((pm_location_t) { 0 }) + ); } /** @@ -4663,29 +4585,24 @@ pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constan static pm_hash_node_t * pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) { assert(opening != NULL); - pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t); - - *node = (pm_hash_node_t) { - { - .type = PM_HASH_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(opening) - }, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_NULL_VALUE(parser), - .elements = { 0 } - }; - return node; + return pm_hash_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, opening), + TOK2LOC(parser, opening), + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }) + ); } /** * Append a new element to a hash node. */ -static inline void -pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) { - pm_node_list_append(&hash->elements, element); +static PRISM_INLINE void +pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) { + pm_node_list_append(arena, &hash->elements, element); bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE); if (static_literal) { @@ -4696,14 +4613,14 @@ pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) { } if (!static_literal) { - pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL); + pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL); } } -static inline void -pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) { - hash->base.location.end = token->end; - hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token); +static PRISM_INLINE void +pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) { + PM_NODE_LENGTH_SET_TOKEN(parser, hash, token); + hash->closing_loc = TOK2LOC(parser, token); } /** @@ -4719,38 +4636,32 @@ pm_if_node_create(pm_parser_t *parser, const pm_token_t *end_keyword ) { pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); - pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); - const uint8_t *end; - if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) { - end = end_keyword->end; + uint32_t start = PM_TOKEN_START(parser, if_keyword); + uint32_t end; + + if (end_keyword != NULL) { + end = PM_TOKEN_END(parser, end_keyword); } else if (subsequent != NULL) { - end = subsequent->location.end; + end = PM_NODE_END(subsequent); } else if (pm_statements_node_body_length(statements) != 0) { - end = statements->base.location.end; + end = PM_NODE_END(statements); } else { - end = predicate->location.end; - } - - *node = (pm_if_node_t) { - { - .type = PM_IF_NODE, - .flags = PM_NODE_FLAG_NEWLINE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = if_keyword->start, - .end = end - }, - }, - .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword), - .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword), - .statements = statements, - .subsequent = subsequent, - .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) - }; - - return node; + end = PM_NODE_END(predicate); + } + + return pm_if_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_NEWLINE, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + TOK2LOC(parser, if_keyword), + predicate, + NTOK2LOC(parser, then_keyword), + statements, + subsequent, + NTOK2LOC(parser, end_keyword) + ); } /** @@ -4759,30 +4670,22 @@ pm_if_node_create(pm_parser_t *parser, static pm_if_node_t * pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) { pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); - pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); pm_statements_node_t *statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, statements, statement, true); - *node = (pm_if_node_t) { - { - .type = PM_IF_NODE, - .flags = PM_NODE_FLAG_NEWLINE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = statement->location.start, - .end = predicate->location.end - }, - }, - .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword), - .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .statements = statements, - .subsequent = NULL, - .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; + return pm_if_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_NEWLINE, + PM_LOCATION_INIT_NODES(statement, predicate), + TOK2LOC(parser, if_keyword), + predicate, + ((pm_location_t) { 0 }), + statements, + NULL, + ((pm_location_t) { 0 }) + ); } /** @@ -4799,43 +4702,31 @@ pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_to pm_statements_node_t *else_statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, else_statements, false_expression, true); - pm_token_t end_keyword = not_provided(parser); - pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword); - - pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t); - - *node = (pm_if_node_t) { - { - .type = PM_IF_NODE, - .flags = PM_NODE_FLAG_NEWLINE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = predicate->location.start, - .end = false_expression->location.end, - }, - }, - .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .predicate = predicate, - .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark), - .statements = if_statements, - .subsequent = (pm_node_t *) else_node, - .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; - + pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL); + return pm_if_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_NEWLINE, + PM_LOCATION_INIT_NODES(predicate, false_expression), + ((pm_location_t) { 0 }), + predicate, + TOK2LOC(parser, qmark), + if_statements, + UP(else_node), + ((pm_location_t) { 0 }) + ); } -static inline void -pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) { - node->base.location.end = keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword); +static PRISM_INLINE void +pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); + node->end_keyword_loc = TOK2LOC(parser, keyword); } -static inline void -pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) { - node->base.location.end = keyword->end; - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword); +static PRISM_INLINE void +pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword); + node->end_keyword_loc = TOK2LOC(parser, keyword); } /** @@ -4843,18 +4734,13 @@ pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword */ static pm_implicit_node_t * pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) { - pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t); - - *node = (pm_implicit_node_t) { - { - .type = PM_IMPLICIT_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = value->location - }, - .value = value - }; - - return node; + return pm_implicit_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODE(value), + value + ); } /** @@ -4864,17 +4750,12 @@ static pm_implicit_rest_node_t * pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_COMMA); - pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t); - - *node = (pm_implicit_rest_node_t) { - { - .type = PM_IMPLICIT_REST_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - } - }; - - return node; + return pm_implicit_rest_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -4883,28 +4764,33 @@ pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) { static pm_integer_node_t * pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) { assert(token->type == PM_TOKEN_INTEGER); - pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t); - *node = (pm_integer_node_t) { - { - .type = PM_INTEGER_NODE, - .flags = base | PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .value = { 0 } - }; + pm_integer_node_t *node = pm_integer_node_new( + parser->arena, + ++parser->node_id, + base | PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + ((pm_integer_t) { 0 }) + ); - pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL; - switch (base) { - case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break; - case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break; - case PM_INTEGER_BASE_FLAGS_DECIMAL: break; - case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break; - default: assert(false && "unreachable"); break; + if (parser->integer.lexed) { + // The value was already computed during lexing. + node->value.value = parser->integer.value; + parser->integer.lexed = false; + } else { + pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL; + switch (base) { + case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break; + case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break; + case PM_INTEGER_BASE_FLAGS_DECIMAL: break; + case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break; + default: assert(false && "unreachable"); break; + } + + pm_integer_parse(&node->value, integer_base, token->start, token->end); + pm_integer_arena_move(parser->arena, &node->value); } - pm_integer_parse(&node->value, integer_base, token->start, token->end); return node; } @@ -4916,22 +4802,17 @@ static pm_imaginary_node_t * pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) { assert(token->type == PM_TOKEN_INTEGER_IMAGINARY); - pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); - *node = (pm_imaginary_node_t) { - { - .type = PM_IMAGINARY_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) { + return pm_imaginary_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + UP(pm_integer_node_create(parser, base, &((pm_token_t) { .type = PM_TOKEN_INTEGER, .start = token->start, .end = token->end - 1 - })) - }; - - return node; + }))) + ); } /** @@ -4942,17 +4823,14 @@ static pm_rational_node_t * pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) { assert(token->type == PM_TOKEN_INTEGER_RATIONAL); - pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t); - *node = (pm_rational_node_t) { - { - .type = PM_RATIONAL_NODE, - .flags = base | PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .numerator = { 0 }, - .denominator = { .value = 1, 0 } - }; + pm_rational_node_t *node = pm_rational_node_new( + parser->arena, + ++parser->node_id, + base | PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + ((pm_integer_t) { 0 }), + ((pm_integer_t) { .value = 1 }) + ); pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL; switch (base) { @@ -4964,6 +4842,7 @@ pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const } pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1); + pm_integer_arena_move(parser->arena, &node->numerator); return node; } @@ -4976,22 +4855,17 @@ static pm_imaginary_node_t * pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) { assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY); - pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t); - *node = (pm_imaginary_node_t) { - { - .type = PM_IMAGINARY_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) { + return pm_imaginary_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token), + UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) { .type = PM_TOKEN_INTEGER_RATIONAL, .start = token->start, .end = token->end - 1 - })) - }; - - return node; + }))) + ); } /** @@ -4999,33 +4873,27 @@ pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t b */ static pm_in_node_t * pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) { - pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t); + uint32_t start = PM_TOKEN_START(parser, in_keyword); + uint32_t end; - const uint8_t *end; if (statements != NULL) { - end = statements->base.location.end; - } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) { - end = then_keyword->end; + end = PM_NODE_END(statements); + } else if (then_keyword != NULL) { + end = PM_TOKEN_END(parser, then_keyword); } else { - end = pattern->location.end; - } - - *node = (pm_in_node_t) { - { - .type = PM_IN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = in_keyword->start, - .end = end - }, - }, - .pattern = pattern, - .statements = statements, - .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword), - .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword) - }; - - return node; + end = PM_NODE_END(pattern); + } + + return pm_in_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + pattern, + statements, + TOK2LOC(parser, in_keyword), + NTOK2LOC(parser, then_keyword) + ); } /** @@ -5034,24 +4902,17 @@ pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t static pm_instance_variable_and_write_node_t * pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t); - *node = (pm_instance_variable_and_write_node_t) { - { - .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_instance_variable_and_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value + ); } /** @@ -5059,25 +4920,17 @@ pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_vari */ static pm_instance_variable_operator_write_node_t * pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t); - - *node = (pm_instance_variable_operator_write_node_t) { - { - .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1) - }; - - return node; + return pm_instance_variable_operator_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1) + ); } /** @@ -5086,24 +4939,17 @@ pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance static pm_instance_variable_or_write_node_t * pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) { assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t); - - *node = (pm_instance_variable_or_write_node_t) { - { - .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .name = target->name, - .name_loc = target->base.location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - return node; + return pm_instance_variable_or_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->name, + target->base.location, + TOK2LOC(parser, operator), + value + ); } /** @@ -5112,18 +4958,14 @@ pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_varia static pm_instance_variable_read_node_t * pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_INSTANCE_VARIABLE); - pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t); - - *node = (pm_instance_variable_read_node_t) { - { - .type = PM_INSTANCE_VARIABLE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .name = pm_parser_constant_id_token(parser, token) - }; - return node; + return pm_instance_variable_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token), + pm_parser_constant_id_token(parser, token) + ); } /** @@ -5132,24 +4974,16 @@ pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *tok */ static pm_instance_variable_write_node_t * pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) { - pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t); - *node = (pm_instance_variable_write_node_t) { - { - .type = PM_INSTANCE_VARIABLE_WRITE_NODE, - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = read_node->base.location.start, - .end = value->location.end - } - }, - .name = read_node->name, - .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node), - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_instance_variable_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + PM_LOCATION_INIT_NODES(read_node, value), + read_node->name, + read_node->base.location, + value, + TOK2LOC(parser, operator) + ); } /** @@ -5158,7 +4992,7 @@ pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable * literals. */ static void -pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) { +pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) { switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN); @@ -5186,14 +5020,14 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p break; } case PM_EMBEDDED_VARIABLE_NODE: - pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL); + pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL); break; default: assert(false && "unexpected node type"); break; } - pm_node_list_append(parts, part); + pm_node_list_append(arena, parts, part); } /** @@ -5201,43 +5035,34 @@ pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *p */ static pm_interpolated_regular_expression_node_t * pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) { - pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t); - - *node = (pm_interpolated_regular_expression_node_t) { - { - .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = NULL, - }, - }, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(opening), - .parts = { 0 } - }; - - return node; + return pm_interpolated_regular_expression_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, opening), + TOK2LOC(parser, opening), + ((pm_node_list_t) { 0 }), + TOK2LOC(parser, opening) + ); } -static inline void -pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) { - if (node->base.location.start > part->location.start) { - node->base.location.start = part->location.start; +static PRISM_INLINE void +pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) { + if (PM_NODE_START(node) > PM_NODE_START(part)) { + PM_NODE_START_SET_NODE(node, part); } - if (node->base.location.end < part->location.end) { - node->base.location.end = part->location.end; + if (PM_NODE_END(node) < PM_NODE_END(part)) { + PM_NODE_LENGTH_SET_NODE(node, part); } - pm_interpolated_node_append((pm_node_t *) node, &node->parts, part); + pm_interpolated_node_append(arena, UP(node), &node->parts, part); } -static inline void +static PRISM_INLINE void pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; - pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing)); + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); + pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing)); } /** @@ -5249,7 +5074,7 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte * PM_NODE_FLAG_STATIC_LITERAL indicates that the node should be treated as a * single static literal string that can be pushed onto the stack on its own. * Note that this doesn't necessarily mean that the string will be frozen or - * not; the instructions in CRuby will be either putobject or putstring, + * not; the instructions in CRuby will be either putobject, dupstring or dupchilledstring, * depending on the combination of `--enable-frozen-string-literal`, * `# frozen_string_literal: true`, and whether or not there is interpolation. * @@ -5263,22 +5088,31 @@ pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_inte * is necessary to indicate that the string should be left up to the runtime, * which could potentially use a chilled string otherwise. */ -static inline void -pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) { +static PRISM_INLINE void +pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) { + pm_arena_t *arena = parser->arena; #define CLEAR_FLAGS(node) \ - node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE)) + node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE)) #define MUTABLE_FLAGS(node) \ - node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN); + node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN); - if (node->parts.size == 0 && node->opening_loc.start == NULL) { - node->base.location.start = part->location.start; + if (node->parts.size == 0 && node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(node, part); } - node->base.location.end = MAX(node->base.location.end, part->location.end); + if (PM_NODE_END(part) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, part); + } switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: + // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags, + // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for + // as long as this interpolation only consists of other string literals. + if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) { + pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL); + } part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE); break; case PM_INTERPOLATED_STRING_NODE: @@ -5328,12 +5162,24 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ // should clear the mutability flags. CLEAR_FLAGS(node); break; + case PM_X_STRING_NODE: + case PM_INTERPOLATED_X_STRING_NODE: + case PM_SYMBOL_NODE: + case PM_INTERPOLATED_SYMBOL_NODE: + // These will only happen in error cases. But we want to handle it + // here so that we don't fail the assertion. + CLEAR_FLAGS(node); + pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part))); + return; + case PM_ERROR_RECOVERY_NODE: + CLEAR_FLAGS(node); + break; default: assert(false && "unexpected node type"); break; } - pm_node_list_append(&node->parts, part); + pm_node_list_append(arena, &node->parts, part); #undef CLEAR_FLAGS #undef MUTABLE_FLAGS @@ -5344,7 +5190,6 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ */ static pm_interpolated_string_node_t * pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) { - pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t); pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL; switch (parser->frozen_string_literal) { @@ -5356,25 +5201,23 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin break; } - *node = (pm_interpolated_string_node_t) { - { - .type = PM_INTERPOLATED_STRING_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end, - }, - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .parts = { 0 } - }; + uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing); + + pm_interpolated_string_node_t *node = pm_interpolated_string_node_new( + parser->arena, + ++parser->node_id, + flags, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + NTOK2LOC(parser, opening), + ((pm_node_list_t) { 0 }), + NTOK2LOC(parser, closing) + ); if (parts != NULL) { pm_node_t *part; PM_NODE_LIST_FOREACH(parts, index, part) { - pm_interpolated_string_node_append(node, part); + pm_interpolated_string_node_append(parser, node, part); } } @@ -5385,25 +5228,28 @@ pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *openin * Set the closing token of the given InterpolatedStringNode node. */ static void -pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } static void -pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) { - if (node->parts.size == 0 && node->opening_loc.start == NULL) { - node->base.location.start = part->location.start; +pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) { + if (node->parts.size == 0 && node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(node, part); } - pm_interpolated_node_append((pm_node_t *) node, &node->parts, part); - node->base.location.end = MAX(node->base.location.end, part->location.end); + pm_interpolated_node_append(arena, UP(node), &node->parts, part); + + if (PM_NODE_END(part) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, part); + } } static void -pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } /** @@ -5411,27 +5257,23 @@ pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, */ static pm_interpolated_symbol_node_t * pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) { - pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t); - - *node = (pm_interpolated_symbol_node_t) { - { - .type = PM_INTERPOLATED_SYMBOL_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end, - }, - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .parts = { 0 } - }; + uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing); + + pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + NTOK2LOC(parser, opening), + ((pm_node_list_t) { 0 }), + NTOK2LOC(parser, closing) + ); if (parts != NULL) { pm_node_t *part; PM_NODE_LIST_FOREACH(parts, index, part) { - pm_interpolated_symbol_node_append(node, part); + pm_interpolated_symbol_node_append(parser->arena, node, part); } } @@ -5443,35 +5285,27 @@ pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *openin */ static pm_interpolated_x_string_node_t * pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) { - pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t); - - *node = (pm_interpolated_x_string_node_t) { - { - .type = PM_INTERPOLATED_X_STRING_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - }, - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .parts = { 0 } - }; - - return node; + return pm_interpolated_x_string_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + TOK2LOC(parser, opening), + ((pm_node_list_t) { 0 }), + TOK2LOC(parser, closing) + ); } -static inline void -pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) { - pm_interpolated_node_append((pm_node_t *) node, &node->parts, part); - node->base.location.end = part->location.end; +static PRISM_INLINE void +pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) { + pm_interpolated_node_append(arena, UP(node), &node->parts, part); + PM_NODE_LENGTH_SET_NODE(node, part); } -static inline void -pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { - node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing); - node->base.location.end = closing->end; +static PRISM_INLINE void +pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) { + node->closing_loc = TOK2LOC(parser, closing); + PM_NODE_LENGTH_SET_TOKEN(parser, node, closing); } /** @@ -5479,17 +5313,12 @@ pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, */ static pm_it_local_variable_read_node_t * pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) { - pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t); - - *node = (pm_it_local_variable_read_node_t) { - { - .type = PM_IT_LOCAL_VARIABLE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name) - } - }; - - return node; + return pm_it_local_variable_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name) + ); } /** @@ -5497,20 +5326,12 @@ pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *nam */ static pm_it_parameters_node_t * pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) { - pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t); - - *node = (pm_it_parameters_node_t) { - { - .type = PM_IT_PARAMETERS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - } - } - }; - - return node; + return pm_it_parameters_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing) + ); } /** @@ -5518,37 +5339,31 @@ pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, con */ static pm_keyword_hash_node_t * pm_keyword_hash_node_create(pm_parser_t *parser) { - pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t); - - *node = (pm_keyword_hash_node_t) { - .base = { - .type = PM_KEYWORD_HASH_NODE, - .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }, - .elements = { 0 } - }; - - return node; + return pm_keyword_hash_node_new( + parser->arena, + ++parser->node_id, + PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS, + PM_LOCATION_INIT_UNSET, + ((pm_node_list_t) { 0 }) + ); } /** * Append an element to a KeywordHashNode node. */ static void -pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) { +pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) { // If the element being added is not an AssocNode or does not have a symbol // key, then we want to turn the SYMBOL_KEYS flag off. if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) { - pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS); + pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS); } - pm_node_list_append(&hash->elements, element); - if (hash->base.location.start == NULL) { - hash->base.location.start = element->location.start; + pm_node_list_append(arena, &hash->elements, element); + if (PM_NODE_LENGTH(hash) == 0) { + PM_NODE_START_SET_NODE(hash, element); } - hash->base.location.end = element->location.end; + PM_NODE_LENGTH_SET_NODE(hash, element); } /** @@ -5556,22 +5371,14 @@ pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *el */ static pm_required_keyword_parameter_node_t * pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) { - pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t); - - *node = (pm_required_keyword_parameter_node_t) { - { - .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = name->start, - .end = name->end - }, - }, - .name = pm_parser_constant_id_location(parser, name->start, name->end - 1), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), - }; - - return node; + return pm_required_keyword_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + pm_parser_constant_id_raw(parser, name->start, name->end - 1), + TOK2LOC(parser, name) + ); } /** @@ -5579,23 +5386,15 @@ pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t */ static pm_optional_keyword_parameter_node_t * pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) { - pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t); - - *node = (pm_optional_keyword_parameter_node_t) { - { - .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = name->start, - .end = value->location.end - }, - }, - .name = pm_parser_constant_id_location(parser, name->start, name->end - 1), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), - .value = value - }; - - return node; + return pm_optional_keyword_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN_NODE(parser, name, value), + pm_parser_constant_id_raw(parser, name->start, name->end - 1), + TOK2LOC(parser, name), + value + ); } /** @@ -5603,23 +5402,15 @@ pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t */ static pm_keyword_rest_parameter_node_t * pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) { - pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t); - - *node = (pm_keyword_rest_parameter_node_t) { - { - .type = PM_KEYWORD_REST_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end) - }, - }, - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_keyword_rest_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name), + name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + NTOK2LOC(parser, name), + TOK2LOC(parser, operator) + ); } /** @@ -5635,26 +5426,18 @@ pm_lambda_node_create( pm_node_t *parameters, pm_node_t *body ) { - pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t); - - *node = (pm_lambda_node_t) { - { - .type = PM_LAMBDA_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = closing->end - }, - }, - .locals = *locals, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), - .parameters = parameters, - .body = body - }; - - return node; + return pm_lambda_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, operator, closing), + *locals, + TOK2LOC(parser, operator), + TOK2LOC(parser, opening), + TOK2LOC(parser, closing), + parameters, + body + ); } /** @@ -5664,25 +5447,18 @@ static pm_local_variable_and_write_node_t * pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) { assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE)); assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL); - pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t); - - *node = (pm_local_variable_and_write_node_t) { - { - .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->location.start, - .end = value->location.end - } - }, - .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .name = name, - .depth = depth - }; - return node; + return pm_local_variable_and_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->location, + TOK2LOC(parser, operator), + value, + name, + depth + ); } /** @@ -5690,26 +5466,18 @@ pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, */ static pm_local_variable_operator_write_node_t * pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) { - pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t); - - *node = (pm_local_variable_operator_write_node_t) { - { - .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->location.start, - .end = value->location.end - } - }, - .name_loc = target->location, - .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .name = name, - .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1), - .depth = depth - }; - - return node; + return pm_local_variable_operator_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->location, + TOK2LOC(parser, operator), + value, + name, + pm_parser_constant_id_raw(parser, operator->start, operator->end - 1), + depth + ); } /** @@ -5719,25 +5487,18 @@ static pm_local_variable_or_write_node_t * pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) { assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE)); assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL); - pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t); - - *node = (pm_local_variable_or_write_node_t) { - { - .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->location.start, - .end = value->location.end - } - }, - .name_loc = target->location, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value, - .name = name, - .depth = depth - }; - return node; + return pm_local_variable_or_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(target, value), + target->location, + TOK2LOC(parser, operator), + value, + name, + depth + ); } /** @@ -5747,19 +5508,14 @@ static pm_local_variable_read_node_t * pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) { if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id); - pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t); - - *node = (pm_local_variable_read_node_t) { - { - .type = PM_LOCAL_VARIABLE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name) - }, - .name = name_id, - .depth = depth - }; - - return node; + return pm_local_variable_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + name_id, + depth + ); } /** @@ -5786,32 +5542,23 @@ pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t */ static pm_local_variable_write_node_t * pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) { - pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t); - - *node = (pm_local_variable_write_node_t) { - { - .type = PM_LOCAL_VARIABLE_WRITE_NODE, - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = name_loc->start, - .end = value->location.end - } - }, - .name = name, - .depth = depth, - .value = value, - .name_loc = *name_loc, - .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_local_variable_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }), + name, + depth, + *name_loc, + value, + TOK2LOC(parser, operator) + ); } /** * Returns true if the given bounds comprise `it`. */ -static inline bool +static PRISM_INLINE bool pm_token_is_it(const uint8_t *start, const uint8_t *end) { return (end - start == 2) && (start[0] == 'i') && (start[1] == 't'); } @@ -5820,19 +5567,24 @@ pm_token_is_it(const uint8_t *start, const uint8_t *end) { * Returns true if the given bounds comprise a numbered parameter (i.e., they * are of the form /^_\d$/). */ -static inline bool -pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { - return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1])); +static PRISM_INLINE bool +pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) { + return ( + (length == 2) && + (parser->start[start] == '_') && + (parser->start[start + 1] != '0') && + pm_char_is_decimal_digit(parser->start[start + 1]) + ); } /** * Ensure the given bounds do not comprise a numbered parameter. If they do, add * an appropriate error message to the parser. */ -static inline void -pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { - if (pm_token_is_numbered_parameter(start, end)) { - PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start); +static PRISM_INLINE void +pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) { + if (pm_token_is_numbered_parameter(parser, start, length)) { + PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start); } } @@ -5842,20 +5594,16 @@ pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const ui */ static pm_local_variable_target_node_t * pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) { - pm_refute_numbered_parameter(parser, location->start, location->end); - pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t); - - *node = (pm_local_variable_target_node_t) { - { - .type = PM_LOCAL_VARIABLE_TARGET_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = *location - }, - .name = name, - .depth = depth - }; - - return node; + pm_refute_numbered_parameter(parser, location->start, location->length); + + return pm_local_variable_target_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = location->start, .length = location->length }), + name, + depth + ); } /** @@ -5865,23 +5613,15 @@ static pm_match_predicate_node_t * pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) { pm_assert_value_expression(parser, value); - pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t); - - *node = (pm_match_predicate_node_t) { - { - .type = PM_MATCH_PREDICATE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = value->location.start, - .end = pattern->location.end - } - }, - .value = value, - .pattern = pattern, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_match_predicate_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(value, pattern), + value, + pattern, + TOK2LOC(parser, operator) + ); } /** @@ -5891,23 +5631,15 @@ static pm_match_required_node_t * pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) { pm_assert_value_expression(parser, value); - pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t); - - *node = (pm_match_required_node_t) { - { - .type = PM_MATCH_REQUIRED_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = value->location.start, - .end = pattern->location.end - } - }, - .value = value, - .pattern = pattern, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_match_required_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(value, pattern), + value, + pattern, + TOK2LOC(parser, operator) + ); } /** @@ -5915,19 +5647,14 @@ pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t * */ static pm_match_write_node_t * pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) { - pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t); - - *node = (pm_match_write_node_t) { - { - .type = PM_MATCH_WRITE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = call->base.location - }, - .call = call, - .targets = { 0 } - }; - - return node; + return pm_match_write_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODE(call), + call, + ((pm_node_list_t) { 0 }) + ); } /** @@ -5935,26 +5662,18 @@ pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) { */ static pm_module_node_t * pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) { - pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t); - - *node = (pm_module_node_t) { - { - .type = PM_MODULE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = module_keyword->start, - .end = end_keyword->end - } - }, - .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals), - .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword), - .constant_path = constant_path, - .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword), - .name = pm_parser_constant_id_token(parser, name) - }; - - return node; + return pm_module_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword), + (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals), + TOK2LOC(parser, module_keyword), + constant_path, + body, + TOK2LOC(parser, end_keyword), + pm_parser_constant_id_token(parser, name) + ); } /** @@ -5962,22 +5681,17 @@ pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const */ static pm_multi_target_node_t * pm_multi_target_node_create(pm_parser_t *parser) { - pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t); - - *node = (pm_multi_target_node_t) { - { - .type = PM_MULTI_TARGET_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { .start = NULL, .end = NULL } - }, - .lefts = { 0 }, - .rest = NULL, - .rights = { 0 }, - .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; + return pm_multi_target_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + ((pm_node_list_t) { 0 }), + NULL, + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }) + ); } /** @@ -5990,27 +5704,27 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t node->rest = target; } else { pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS); - pm_node_list_append(&node->rights, target); + pm_node_list_append(parser->arena, &node->rights, target); } } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) { if (node->rest == NULL) { node->rest = target; } else { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST); - pm_node_list_append(&node->rights, target); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST); + pm_node_list_append(parser->arena, &node->rights, target); } } else if (node->rest == NULL) { - pm_node_list_append(&node->lefts, target); + pm_node_list_append(parser->arena, &node->lefts, target); } else { - pm_node_list_append(&node->rights, target); + pm_node_list_append(parser->arena, &node->rights, target); } - if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) { - node->base.location.start = target->location.start; + if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) { + PM_NODE_START_SET_NODE(node, target); } - if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) { - node->base.location.end = target->location.end; + if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) { + PM_NODE_LENGTH_SET_NODE(node, target); } } @@ -6018,18 +5732,19 @@ pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t * Set the opening of a MultiTargetNode node. */ static void -pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) { - node->base.location.start = lparen->start; - node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen); +pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) { + PM_NODE_START_SET_TOKEN(parser, node, lparen); + PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen); + node->lparen_loc = TOK2LOC(parser, lparen); } /** * Set the closing of a MultiTargetNode node. */ static void -pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) { - node->base.location.end = rparen->end; - node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen); +pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen); + node->rparen_loc = TOK2LOC(parser, rparen); } /** @@ -6037,32 +5752,21 @@ pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t */ static pm_multi_write_node_t * pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) { - pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t); - - *node = (pm_multi_write_node_t) { - { - .type = PM_MULTI_WRITE_NODE, - .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = target->base.location.start, - .end = value->location.end - } - }, - .lefts = target->lefts, - .rest = target->rest, - .rights = target->rights, - .lparen_loc = target->lparen_loc, - .rparen_loc = target->rparen_loc, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - // Explicitly do not call pm_node_destroy here because we want to keep - // around all of the information within the MultiWriteNode node. - xfree(target); - - return node; + /* The target is no longer necessary because we have reused its children. It + * is arena-allocated so no explicit free is needed. */ + return pm_multi_write_node_new( + parser->arena, + ++parser->node_id, + pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY), + PM_LOCATION_INIT_NODES(target, value), + target->lefts, + target->rest, + target->rights, + target->lparen_loc, + target->rparen_loc, + TOK2LOC(parser, operator), + value + ); } /** @@ -6071,22 +5775,15 @@ pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, static pm_next_node_t * pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) { assert(keyword->type == PM_TOKEN_KEYWORD_NEXT); - pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t); - *node = (pm_next_node_t) { - { - .type = PM_NEXT_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = (arguments == NULL ? keyword->end : arguments->base.location.end) - } - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .arguments = arguments - }; - - return node; + return pm_next_node_new( + parser->arena, + ++parser->node_id, + 0, + (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments), + arguments, + TOK2LOC(parser, keyword) + ); } /** @@ -6095,16 +5792,31 @@ pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments static pm_nil_node_t * pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_NIL); - pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t); - *node = (pm_nil_node_t) {{ - .type = PM_NIL_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; + return pm_nil_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token) + ); +} - return node; +/** + * Allocate and initialize a new NoKeywordsParameterNode node. + */ +static pm_no_block_parameter_node_t * +pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) { + assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND); + assert(keyword->type == PM_TOKEN_KEYWORD_NIL); + + return pm_no_block_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, operator, keyword), + TOK2LOC(parser, operator), + TOK2LOC(parser, keyword) + ); } /** @@ -6114,41 +5826,29 @@ static pm_no_keywords_parameter_node_t * pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) { assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR); assert(keyword->type == PM_TOKEN_KEYWORD_NIL); - pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t); - *node = (pm_no_keywords_parameter_node_t) { - { - .type = PM_NO_KEYWORDS_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = keyword->end - } - }, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword) - }; - - return node; + return pm_no_keywords_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, operator, keyword), + TOK2LOC(parser, operator), + TOK2LOC(parser, keyword) + ); } /** * Allocate and initialize a new NumberedParametersNode node. */ static pm_numbered_parameters_node_t * -pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) { - pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t); - - *node = (pm_numbered_parameters_node_t) { - { - .type = PM_NUMBERED_PARAMETERS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = *location - }, - .maximum = maximum - }; - - return node; +pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) { + return pm_numbered_parameters_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + maximum + ); } /** @@ -6184,14 +5884,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to unsigned long value = strtoul(digits, &endptr, 10); if ((digits == endptr) || (*endptr != '\0')) { - pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL); + pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL); value = 0; } - xfree(digits); + xfree_sized(digits, sizeof(char) * (length + 1)); if ((errno == ERANGE) || (value > NTH_REF_MAX)) { - PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); + PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start); value = 0; } @@ -6206,18 +5906,14 @@ pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *to static pm_numbered_reference_read_node_t * pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) { assert(name->type == PM_TOKEN_NUMBERED_REFERENCE); - pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t); - - *node = (pm_numbered_reference_read_node_t) { - { - .type = PM_NUMBERED_REFERENCE_READ_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(name), - }, - .number = pm_numbered_reference_read_node_number(parser, name) - }; - return node; + return pm_numbered_reference_read_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, name), + pm_numbered_reference_read_node_number(parser, name) + ); } /** @@ -6225,24 +5921,16 @@ pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *na */ static pm_optional_parameter_node_t * pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) { - pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t); - - *node = (pm_optional_parameter_node_t) { - { - .type = PM_OPTIONAL_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = name->start, - .end = value->location.end - } - }, - .name = pm_parser_constant_id_token(parser, name), - .name_loc = PM_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .value = value - }; - - return node; + return pm_optional_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN_NODE(parser, name, value), + pm_parser_constant_id_token(parser, name), + TOK2LOC(parser, name), + TOK2LOC(parser, operator), + value + ); } /** @@ -6252,23 +5940,15 @@ static pm_or_node_t * pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) { pm_assert_value_expression(parser, left); - pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t); - - *node = (pm_or_node_t) { - { - .type = PM_OR_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = left->location.start, - .end = right->location.end - } - }, - .left = left, - .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_or_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(left, right), + left, + right, + TOK2LOC(parser, operator) + ); } /** @@ -6276,24 +5956,19 @@ pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operat */ static pm_parameters_node_t * pm_parameters_node_create(pm_parser_t *parser) { - pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t); - - *node = (pm_parameters_node_t) { - { - .type = PM_PARAMETERS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(&parser->current) - }, - .rest = NULL, - .keyword_rest = NULL, - .block = NULL, - .requireds = { 0 }, - .optionals = { 0 }, - .posts = { 0 }, - .keywords = { 0 } - }; - - return node; + return pm_parameters_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + ((pm_node_list_t) { 0 }), + ((pm_node_list_t) { 0 }), + NULL, + ((pm_node_list_t) { 0 }), + ((pm_node_list_t) { 0 }), + NULL, + NULL + ); } /** @@ -6301,16 +5976,12 @@ pm_parameters_node_create(pm_parser_t *parser) { */ static void pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) { - if (params->base.location.start == NULL) { - params->base.location.start = param->location.start; - } else { - params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start; + if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) { + PM_NODE_START_SET_NODE(params, param); } - if (params->base.location.end == NULL) { - params->base.location.end = param->location.end; - } else { - params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end; + if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) { + PM_NODE_LENGTH_SET_NODE(params, param); } } @@ -6318,27 +5989,27 @@ pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) * Append a required parameter to a ParametersNode node. */ static void -pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) { +pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) { pm_parameters_node_location_set(params, param); - pm_node_list_append(¶ms->requireds, param); + pm_node_list_append(arena, ¶ms->requireds, param); } /** * Append an optional parameter to a ParametersNode node. */ static void -pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) { - pm_parameters_node_location_set(params, (pm_node_t *) param); - pm_node_list_append(¶ms->optionals, (pm_node_t *) param); +pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) { + pm_parameters_node_location_set(params, UP(param)); + pm_node_list_append(arena, ¶ms->optionals, UP(param)); } /** * Append a post optional arguments parameter to a ParametersNode node. */ static void -pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) { +pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) { pm_parameters_node_location_set(params, param); - pm_node_list_append(¶ms->posts, param); + pm_node_list_append(arena, ¶ms->posts, param); } /** @@ -6354,9 +6025,9 @@ pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) { * Append a keyword parameter to a ParametersNode node. */ static void -pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) { +pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) { pm_parameters_node_location_set(params, param); - pm_node_list_append(¶ms->keywords, param); + pm_node_list_append(arena, ¶ms->keywords, param); } /** @@ -6373,9 +6044,9 @@ pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *par * Set the block parameter on a ParametersNode node. */ static void -pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) { +pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) { assert(params->block == NULL); - pm_parameters_node_location_set(params, (pm_node_t *) param); + pm_parameters_node_location_set(params, param); params->block = param; } @@ -6384,46 +6055,30 @@ pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_no */ static pm_program_node_t * pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) { - pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t); - - *node = (pm_program_node_t) { - { - .type = PM_PROGRAM_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = statements == NULL ? parser->start : statements->base.location.start, - .end = statements == NULL ? parser->end : statements->base.location.end - } - }, - .locals = *locals, - .statements = statements - }; - - return node; + return pm_program_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODE(statements), + *locals, + statements + ); } /** * Allocate and initialize new ParenthesesNode node. */ static pm_parentheses_node_t * -pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) { - pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t); - - *node = (pm_parentheses_node_t) { - { - .type = PM_PARENTHESES_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - } - }, - .body = body, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) - }; - - return node; +pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) { + return pm_parentheses_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + body, + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } /** @@ -6431,24 +6086,16 @@ pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_no */ static pm_pinned_expression_node_t * pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) { - pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t); - - *node = (pm_pinned_expression_node_t) { - { - .type = PM_PINNED_EXPRESSION_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = rparen->end - } - }, - .expression = expression, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen), - .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen) - }; - - return node; + return pm_pinned_expression_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, operator, rparen), + expression, + TOK2LOC(parser, operator), + TOK2LOC(parser, lparen), + TOK2LOC(parser, rparen) + ); } /** @@ -6456,22 +6103,14 @@ pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, con */ static pm_pinned_variable_node_t * pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) { - pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t); - - *node = (pm_pinned_variable_node_t) { - { - .type = PM_PINNED_VARIABLE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = variable->location.end - } - }, - .variable = variable, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_pinned_variable_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable), + variable, + TOK2LOC(parser, operator) + ); } /** @@ -6479,24 +6118,16 @@ pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, */ static pm_post_execution_node_t * pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) { - pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t); - - *node = (pm_post_execution_node_t) { - { - .type = PM_POST_EXECUTION_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = closing->end - } - }, - .statements = statements, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) - }; - - return node; + return pm_post_execution_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, keyword, closing), + statements, + TOK2LOC(parser, keyword), + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } /** @@ -6504,24 +6135,16 @@ pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, co */ static pm_pre_execution_node_t * pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) { - pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t); - - *node = (pm_pre_execution_node_t) { - { - .type = PM_PRE_EXECUTION_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = closing->end - } - }, - .statements = statements, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing) - }; - - return node; + return pm_pre_execution_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, keyword, closing), + statements, + TOK2LOC(parser, keyword), + TOK2LOC(parser, opening), + TOK2LOC(parser, closing) + ); } /** @@ -6531,8 +6154,6 @@ static pm_range_node_t * pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) { pm_assert_value_expression(parser, left); pm_assert_value_expression(parser, right); - - pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t); pm_node_flags_t flags = 0; // Indicate that this node is an exclusive range if the operator is `...`. @@ -6550,22 +6171,18 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope flags |= PM_NODE_FLAG_STATIC_LITERAL; } - *node = (pm_range_node_t) { - { - .type = PM_RANGE_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = (left == NULL ? operator->start : left->location.start), - .end = (right == NULL ? operator->end : right->location.end) - } - }, - .left = left, - .right = right, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; + uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left); + uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right); - return node; + return pm_range_node_new( + parser->arena, + ++parser->node_id, + flags, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + left, + right, + TOK2LOC(parser, operator) + ); } /** @@ -6574,15 +6191,13 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope static pm_redo_node_t * pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_REDO); - pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t); - *node = (pm_redo_node_t) {{ - .type = PM_REDO_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - - return node; + return pm_redo_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -6591,31 +6206,22 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_regular_expression_node_t * pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) { - pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t); - - *node = (pm_regular_expression_node_t) { - { - .type = PM_REGULAR_EXPRESSION_NODE, - .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = MIN(opening->start, closing->start), - .end = MAX(opening->end, closing->end) - } - }, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), - .unescaped = *unescaped - }; - - return node; + return pm_regular_expression_node_new( + parser->arena, + ++parser->node_id, + pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + TOK2LOC(parser, opening), + TOK2LOC(parser, content), + TOK2LOC(parser, closing), + *unescaped + ); } /** * Allocate a new initialize a new RegularExpressionNode node. */ -static inline pm_regular_expression_node_t * +static PRISM_INLINE pm_regular_expression_node_t * pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY); } @@ -6625,18 +6231,13 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening */ static pm_required_parameter_node_t * pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) { - pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t); - - *node = (pm_required_parameter_node_t) { - { - .type = PM_REQUIRED_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }, - .name = pm_parser_constant_id_token(parser, token) - }; - - return node; + return pm_required_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token), + pm_parser_constant_id_token(parser, token) + ); } /** @@ -6644,23 +6245,15 @@ pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) */ static pm_rescue_modifier_node_t * pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) { - pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t); - - *node = (pm_rescue_modifier_node_t) { - { - .type = PM_RESCUE_MODIFIER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = expression->location.start, - .end = rescue_expression->location.end - } - }, - .expression = expression, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .rescue_expression = rescue_expression - }; - - return node; + return pm_rescue_modifier_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_NODES(expression, rescue_expression), + expression, + TOK2LOC(parser, keyword), + rescue_expression + ); } /** @@ -6668,28 +6261,24 @@ pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const */ static pm_rescue_node_t * pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) { - pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t); - - *node = (pm_rescue_node_t) { - { - .type = PM_RESCUE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(keyword) - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .reference = NULL, - .statements = NULL, - .subsequent = NULL, - .exceptions = { 0 } - }; - - return node; + return pm_rescue_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, keyword), + TOK2LOC(parser, keyword), + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }), + NULL, + ((pm_location_t) { 0 }), + NULL, + NULL + ); } -static inline void -pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) { - node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator); +static PRISM_INLINE void +pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) { + node->operator_loc = TOK2LOC(parser, operator); } /** @@ -6698,7 +6287,7 @@ pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) static void pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) { node->reference = reference; - node->base.location.end = reference->location.end; + PM_NODE_LENGTH_SET_NODE(node, reference); } /** @@ -6708,7 +6297,7 @@ static void pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) { node->statements = statements; if (pm_statements_node_body_length(statements) > 0) { - node->base.location.end = statements->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, statements); } } @@ -6718,16 +6307,16 @@ pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *stat static void pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) { node->subsequent = subsequent; - node->base.location.end = subsequent->base.location.end; + PM_NODE_LENGTH_SET_NODE(node, subsequent); } /** * Append an exception node to a rescue node, and update the location. */ static void -pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) { - pm_node_list_append(&node->exceptions, exception); - node->base.location.end = exception->location.end; +pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) { + pm_node_list_append(arena, &node->exceptions, exception); + PM_NODE_LENGTH_SET_NODE(node, exception); } /** @@ -6735,23 +6324,15 @@ pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) { */ static pm_rest_parameter_node_t * pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) { - pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t); - - *node = (pm_rest_parameter_node_t) { - { - .type = PM_REST_PARAMETER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end) - } - }, - .name = pm_parser_optional_constant_id_token(parser, name), - .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator) - }; - - return node; + return pm_rest_parameter_node_new( + parser->arena, + ++parser->node_id, + 0, + (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name), + name == NULL ? 0 : pm_parser_constant_id_token(parser, name), + NTOK2LOC(parser, name), + TOK2LOC(parser, operator) + ); } /** @@ -6760,15 +6341,13 @@ pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, c static pm_retry_node_t * pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_RETRY); - pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t); - - *node = (pm_retry_node_t) {{ - .type = PM_RETRY_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - return node; + return pm_retry_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -6776,22 +6355,14 @@ pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_return_node_t * pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) { - pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t); - - *node = (pm_return_node_t) { - { - .type = PM_RETURN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = (arguments == NULL ? keyword->end : arguments->base.location.end) - } - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .arguments = arguments - }; - - return node; + return pm_return_node_new( + parser->arena, + ++parser->node_id, + 0, + (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments), + TOK2LOC(parser, keyword), + arguments + ); } /** @@ -6800,15 +6371,13 @@ pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argumen static pm_self_node_t * pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_SELF); - pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t); - - *node = (pm_self_node_t) {{ - .type = PM_SELF_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - return node; + return pm_self_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -6816,19 +6385,13 @@ pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_shareable_constant_node_t * pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) { - pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t); - - *node = (pm_shareable_constant_node_t) { - { - .type = PM_SHAREABLE_CONSTANT_NODE, - .flags = (pm_node_flags_t) value, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NODE_VALUE(write) - }, - .write = write - }; - - return node; + return pm_shareable_constant_node_new( + parser->arena, + ++parser->node_id, + (pm_node_flags_t) value, + PM_LOCATION_INIT_NODE(write), + write + ); } /** @@ -6836,26 +6399,18 @@ pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shar */ static pm_singleton_class_node_t * pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) { - pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t); - - *node = (pm_singleton_class_node_t) { - { - .type = PM_SINGLETON_CLASS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = class_keyword->start, - .end = end_keyword->end - } - }, - .locals = *locals, - .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword), - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .expression = expression, - .body = body, - .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword) - }; - - return node; + return pm_singleton_class_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword), + *locals, + TOK2LOC(parser, class_keyword), + TOK2LOC(parser, operator), + expression, + body, + TOK2LOC(parser, end_keyword) + ); } /** @@ -6864,16 +6419,13 @@ pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *local static pm_source_encoding_node_t * pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD___ENCODING__); - pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t); - - *node = (pm_source_encoding_node_t) {{ - .type = PM_SOURCE_ENCODING_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - return node; + return pm_source_encoding_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -6881,7 +6433,6 @@ pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_source_file_node_t* pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) { - pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t); assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__); pm_node_flags_t flags = 0; @@ -6895,17 +6446,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) break; } - *node = (pm_source_file_node_t) { - { - .type = PM_SOURCE_FILE_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(file_keyword), - }, - .filepath = parser->filepath - }; - - return node; + return pm_source_file_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_TOKEN(parser, file_keyword), + parser->filepath + ); } /** @@ -6914,16 +6461,13 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) static pm_source_line_node_t * pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD___LINE__); - pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t); - - *node = (pm_source_line_node_t) {{ - .type = PM_SOURCE_LINE_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - return node; + return pm_source_line_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -6931,22 +6475,14 @@ pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_splat_node_t * pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) { - pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t); - - *node = (pm_splat_node_t) { - { - .type = PM_SPLAT_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = operator->start, - .end = (expression == NULL ? operator->end : expression->location.end) - } - }, - .operator_loc = PM_LOCATION_TOKEN_VALUE(operator), - .expression = expression - }; - - return node; + return pm_splat_node_new( + parser->arena, + ++parser->node_id, + 0, + (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression), + TOK2LOC(parser, operator), + expression + ); } /** @@ -6954,18 +6490,13 @@ pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t */ static pm_statements_node_t * pm_statements_node_create(pm_parser_t *parser) { - pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t); - - *node = (pm_statements_node_t) { - { - .type = PM_STATEMENTS_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser) - }, - .body = { 0 } - }; - - return node; + return pm_statements_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + ((pm_node_list_t) { 0 }) + ); } /** @@ -6977,25 +6508,17 @@ pm_statements_node_body_length(pm_statements_node_t *node) { } /** - * Set the location of the given StatementsNode. - */ -static void -pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) { - node->base.location = (pm_location_t) { .start = start, .end = end }; -} - -/** * Update the location of the statements node based on the statement that is * being added to the list. */ -static inline void +static PRISM_INLINE void pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) { - if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) { - node->base.location.start = statement->location.start; + if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) { + PM_NODE_START_SET_NODE(node, statement); } - if (statement->location.end > node->base.location.end) { - node->base.location.end = statement->location.end; + if (PM_NODE_END(statement) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, statement); } } @@ -7022,7 +6545,7 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, } } - pm_node_list_append(&node->body, statement); + pm_node_list_append(parser->arena, &node->body, statement); if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE); } @@ -7030,18 +6553,17 @@ pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, * Prepend a new node to the given StatementsNode node's body. */ static void -pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) { +pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) { pm_statements_node_body_update(node, statement); - pm_node_list_prepend(&node->body, statement); + pm_node_list_prepend(arena, &node->body, statement); pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE); } /** * Allocate a new StringNode node with the current string on the parser. */ -static inline pm_string_node_t * +static PRISM_INLINE pm_string_node_t * pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) { - pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t); pm_node_flags_t flags = 0; switch (parser->frozen_string_literal) { @@ -7053,23 +6575,19 @@ pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, break; } - *node = (pm_string_node_t) { - { - .type = PM_STRING_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start), - .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end) - } - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .unescaped = *string - }; + uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening); + uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing); - return node; + return pm_string_node_new( + parser->arena, + ++parser->node_id, + flags, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + NTOK2LOC(parser, opening), + TOK2LOC(parser, content), + NTOK2LOC(parser, closing), + *string + ); } /** @@ -7097,30 +6615,21 @@ pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *open static pm_super_node_t * pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) { assert(keyword->type == PM_TOKEN_KEYWORD_SUPER); - pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t); - const uint8_t *end = pm_arguments_end(arguments); - if (end == NULL) { - assert(false && "unreachable"); - } - - *node = (pm_super_node_t) { - { - .type = PM_SUPER_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = end, - } - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .lparen_loc = arguments->opening_loc, - .arguments = arguments->arguments, - .rparen_loc = arguments->closing_loc, - .block = arguments->block - }; - - return node; + const pm_location_t *end = pm_arguments_end(arguments); + assert(end != NULL && "unreachable"); + + return pm_super_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }), + TOK2LOC(parser, keyword), + arguments->opening_loc, + arguments->arguments, + arguments->closing_loc, + arguments->block + ); } /** @@ -7148,7 +6657,7 @@ parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *locat size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor); if (width == 0) { - pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL); + pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL); break; } @@ -7168,7 +6677,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca size_t width = encoding->char_width(cursor, end - cursor); if (width == 0) { - pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL); + pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL); break; } @@ -7185,7 +6694,7 @@ parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *loca * If the validate flag is set, then it will check the contents of the symbol * to ensure that all characters are valid in the encoding. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) { if (parser->explicit_encoding != NULL) { // A Symbol may optionally have its encoding explicitly set. This will @@ -7210,160 +6719,31 @@ parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_ return 0; } -static pm_node_flags_t -parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) { - assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) || - (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) || - (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) || - (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY)); - - // There's special validation logic used if a string does not contain any character escape sequences. - if (parser->explicit_encoding == NULL) { - // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp - // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to - // the US-ASCII encoding. - if (ascii_only) { - return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags; - } - - if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { - if (!ascii_only) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); - } - } else if (parser->encoding != modifier_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name); - - if (modifier == 'n' && !ascii_only) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source)); - } - } - - return flags; - } - - // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile. - bool mixed_encoding = false; - - if (mixed_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); - } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) { - // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily. - bool valid_string_in_modifier_encoding = true; - - if (!valid_string_in_modifier_encoding) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source)); - } - } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { - // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now. - if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source)); - } - } - - // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else. - return flags; -} - -/** - * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and - * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even - * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding - * may be explicitly set with an escape sequence. - */ -static pm_node_flags_t -parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) { - // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report. - bool valid_unicode_range = true; - if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source)); - return flags; - } - - // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding - // to multi-byte characters are allowed. - if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) { - // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the - // following error message appearing twice. We do the same for compatibility. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name); - } - - /** - * Start checking modifier flags. We need to process these before considering any explicit encodings that may have - * been set by character literals. The order in which the encoding modifiers is checked does not matter. In the - * event that both an encoding modifier and an explicit encoding would result in the same encoding we do not set - * the corresponding "forced_<encoding>" flag. Instead, the caller should check the encoding modifier flag and - * determine the encoding that way. - */ - - if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) { - return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY); - } - - if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) { - return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY); - } - - if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) { - return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY); - } - - if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) { - return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY); - } - - // At this point no encoding modifiers will be present on the regular expression as they would have already - // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all - // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII. - if (ascii_only) { - return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING; - } - - // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string - // or by specifying a modifier. - // - // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points. - if (parser->explicit_encoding != NULL) { - if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { - return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING; - } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { - return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING; - } - } - - return 0; -} - /** * Allocate and initialize a new SymbolNode node with the given unescaped * string. */ static pm_symbol_node_t * pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) { - pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t); - - *node = (pm_symbol_node_t) { - { - .type = PM_SYMBOL_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL | flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start), - .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end) - } - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .value_loc = PM_LOCATION_TOKEN_VALUE(value), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .unescaped = *unescaped - }; - - return node; + uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening); + uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing); + + return pm_symbol_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL | flags, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + NTOK2LOC(parser, opening), + NTOK2LOC(parser, value), + NTOK2LOC(parser, closing), + *unescaped + ); } /** * Allocate and initialize a new SymbolNode node. */ -static inline pm_symbol_node_t * +static PRISM_INLINE pm_symbol_node_t * pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) { return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0); } @@ -7383,35 +6763,15 @@ pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *open */ static pm_symbol_node_t * pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) { - pm_symbol_node_t *node; + assert(token->type == PM_TOKEN_LABEL); - switch (token->type) { - case PM_TOKEN_LABEL: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; - - pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; - node = pm_symbol_node_create(parser, &opening, &label, &closing); - - assert((label.end - label.start) >= 0); - pm_string_shared_init(&node->unescaped, label.start, label.end); - pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false)); + pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; + pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; + pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing); - break; - } - case PM_TOKEN_MISSING: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end }; - node = pm_symbol_node_create(parser, &opening, &label, &closing); - break; - } - default: - assert(false && "unreachable"); - node = NULL; - break; - } + assert((label.end - label.start) >= 0); + pm_string_shared_init(&node->unescaped, label.start, label.end); + pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false)); return node; } @@ -7421,18 +6781,16 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_symbol_node_t * pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) { - pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t); - - *node = (pm_symbol_node_t) { - { - .type = PM_SYMBOL_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser) - }, - .value_loc = PM_LOCATION_NULL_VALUE(parser), - .unescaped = { 0 } - }; + pm_symbol_node_t *node = pm_symbol_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING, + PM_LOCATION_INIT_UNSET, + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_string_t) { 0 }) + ); pm_string_constant_init(&node->unescaped, content, strlen(content)); return node; @@ -7442,21 +6800,29 @@ pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) { * Check if the given node is a label in a hash. */ static bool -pm_symbol_node_label_p(pm_node_t *node) { - const uint8_t *end = NULL; +pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) { + const pm_location_t *location = NULL; switch (PM_NODE_TYPE(node)) { - case PM_SYMBOL_NODE: - end = ((pm_symbol_node_t *) node)->closing_loc.end; + case PM_SYMBOL_NODE: { + const pm_symbol_node_t *cast = (pm_symbol_node_t *) node; + if (cast->closing_loc.length > 0) { + location = &cast->closing_loc; + } break; - case PM_INTERPOLATED_SYMBOL_NODE: - end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end; + } + case PM_INTERPOLATED_SYMBOL_NODE: { + const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node; + if (cast->closing_loc.length > 0) { + location = &cast->closing_loc; + } break; + } default: return false; } - return (end != NULL) && (end[-1] == ':'); + return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':'); } /** @@ -7464,32 +6830,26 @@ pm_symbol_node_label_p(pm_node_t *node) { */ static pm_symbol_node_t * pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) { - pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t); + pm_symbol_node_t *new_node = pm_symbol_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + TOK2LOC(parser, opening), + node->content_loc, + TOK2LOC(parser, closing), + node->unescaped + ); - *new_node = (pm_symbol_node_t) { - { - .type = PM_SYMBOL_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - } - }, - .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening), - .value_loc = node->content_loc, - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .unescaped = node->unescaped + pm_token_t content = { + .type = PM_TOKEN_IDENTIFIER, + .start = parser->start + node->content_loc.start, + .end = parser->start + node->content_loc.start + node->content_loc.length }; - pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end }; - pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true)); - - // We are explicitly _not_ using pm_node_destroy here because we don't want - // to trash the unescaped string. We could instead copy the string if we - // know that it is owned, but we're taking the fast path for now. - xfree(node); + pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true)); + /* The old node is arena-allocated so no explicit free is needed. */ return new_node; } @@ -7498,7 +6858,6 @@ pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const */ static pm_string_node_t * pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) { - pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t); pm_node_flags_t flags = 0; switch (parser->frozen_string_literal) { @@ -7510,24 +6869,18 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) { break; } - *new_node = (pm_string_node_t) { - { - .type = PM_STRING_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = node->base.location - }, - .opening_loc = node->opening_loc, - .content_loc = node->value_loc, - .closing_loc = node->closing_loc, - .unescaped = node->unescaped - }; - - // We are explicitly _not_ using pm_node_destroy here because we don't want - // to trash the unescaped string. We could instead copy the string if we - // know that it is owned, but we're taking the fast path for now. - xfree(node); + pm_string_node_t *new_node = pm_string_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_NODE(node), + node->opening_loc, + node->value_loc, + node->closing_loc, + node->unescaped + ); + /* The old node is arena-allocated so no explicit free is needed. */ return new_node; } @@ -7537,16 +6890,13 @@ pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) { static pm_true_node_t * pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_TRUE); - pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t); - *node = (pm_true_node_t) {{ - .type = PM_TRUE_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token) - }}; - - return node; + return pm_true_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_TOKEN(parser, token) + ); } /** @@ -7554,16 +6904,12 @@ pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) { */ static pm_true_node_t * pm_true_node_synthesized_create(pm_parser_t *parser) { - pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t); - - *node = (pm_true_node_t) {{ - .type = PM_TRUE_NODE, - .flags = PM_NODE_FLAG_STATIC_LITERAL, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { .start = parser->start, .end = parser->end } - }}; - - return node; + return pm_true_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_STATIC_LITERAL, + PM_LOCATION_INIT_UNSET + ); } /** @@ -7572,28 +6918,24 @@ pm_true_node_synthesized_create(pm_parser_t *parser) { static pm_undef_node_t * pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) { assert(token->type == PM_TOKEN_KEYWORD_UNDEF); - pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t); - - *node = (pm_undef_node_t) { - { - .type = PM_UNDEF_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_TOKEN_VALUE(token), - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(token), - .names = { 0 } - }; - return node; + return pm_undef_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, token), + ((pm_node_list_t) { 0 }), + TOK2LOC(parser, token) + ); } /** * Append a name to an undef node. */ static void -pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) { - node->base.location.end = name->location.end; - pm_node_list_append(&node->names, name); +pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) { + PM_NODE_LENGTH_SET_NODE(node, name); + pm_node_list_append(arena, &node->names, name); } /** @@ -7602,34 +6944,20 @@ pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) { static pm_unless_node_t * pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) { pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); - pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t); - - const uint8_t *end; - if (statements != NULL) { - end = statements->base.location.end; - } else { - end = predicate->location.end; - } - - *node = (pm_unless_node_t) { - { - .type = PM_UNLESS_NODE, - .flags = PM_NODE_FLAG_NEWLINE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = end - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword), - .statements = statements, - .else_clause = NULL, - .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; + pm_node_t *end = statements == NULL ? predicate : UP(statements); + + return pm_unless_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_NEWLINE, + PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end), + TOK2LOC(parser, keyword), + predicate, + NTOK2LOC(parser, then_keyword), + statements, + NULL, + ((pm_location_t) { 0 }) + ); } /** @@ -7638,36 +6966,28 @@ pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t static pm_unless_node_t * pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) { pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); - pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t); pm_statements_node_t *statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, statements, statement, true); - *node = (pm_unless_node_t) { - { - .type = PM_UNLESS_NODE, - .flags = PM_NODE_FLAG_NEWLINE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = statement->location.start, - .end = predicate->location.end - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword), - .predicate = predicate, - .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .statements = statements, - .else_clause = NULL, - .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE - }; - - return node; + return pm_unless_node_new( + parser->arena, + ++parser->node_id, + PM_NODE_FLAG_NEWLINE, + PM_LOCATION_INIT_NODES(statement, predicate), + TOK2LOC(parser, unless_keyword), + predicate, + ((pm_location_t) { 0 }), + statements, + NULL, + ((pm_location_t) { 0 }) + ); } -static inline void -pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) { - node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword); - node->base.location.end = end_keyword->end; +static PRISM_INLINE void +pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) { + node->end_keyword_loc = TOK2LOC(parser, end_keyword); + PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword); } /** @@ -7682,7 +7002,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen // All of the block exits that we want to remove should be within the // statements, and since we are modifying the statements, we shouldn't have // to check the end location. - const uint8_t *start = statements->base.location.start; + uint32_t start = statements->base.location.start; for (size_t index = parser->current_block_exits->size; index > 0; index--) { pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1]; @@ -7698,27 +7018,19 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen */ static pm_until_node_t * pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) { - pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t); pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); - *node = (pm_until_node_t) { - { - .type = PM_UNTIL_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = closing->end, - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .predicate = predicate, - .statements = statements - }; - - return node; + return pm_until_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_TOKENS(parser, keyword, closing), + TOK2LOC(parser, keyword), + NTOK2LOC(parser, do_keyword), + TOK2LOC(parser, closing), + predicate, + statements + ); } /** @@ -7726,28 +7038,20 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to */ static pm_until_node_t * pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) { - pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t); pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); pm_loop_modifier_block_exits(parser, statements); - *node = (pm_until_node_t) { - { - .type = PM_UNTIL_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = statements->base.location.start, - .end = predicate->location.end, - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .predicate = predicate, - .statements = statements - }; - - return node; + return pm_until_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_NODES(statements, predicate), + TOK2LOC(parser, keyword), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + predicate, + statements + ); } /** @@ -7755,42 +7059,34 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm */ static pm_when_node_t * pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) { - pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t); - - *node = (pm_when_node_t) { - { - .type = PM_WHEN_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = NULL - } - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .statements = NULL, - .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .conditions = { 0 } - }; - - return node; + return pm_when_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_TOKEN(parser, keyword), + TOK2LOC(parser, keyword), + ((pm_node_list_t) { 0 }), + ((pm_location_t) { 0 }), + NULL + ); } /** * Append a new condition to a when node. */ static void -pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) { - node->base.location.end = condition->location.end; - pm_node_list_append(&node->conditions, condition); +pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) { + PM_NODE_LENGTH_SET_NODE(node, condition); + pm_node_list_append(arena, &node->conditions, condition); } /** * Set the location of the then keyword of a when node. */ -static inline void -pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) { - node->base.location.end = then_keyword->end; - node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword); +static PRISM_INLINE void +pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) { + PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword); + node->then_keyword_loc = TOK2LOC(parser, then_keyword); } /** @@ -7798,8 +7094,8 @@ pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_k */ static void pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) { - if (statements->base.location.end > node->base.location.end) { - node->base.location.end = statements->base.location.end; + if (PM_NODE_END(statements) > PM_NODE_END(node)) { + PM_NODE_LENGTH_SET_NODE(node, statements); } node->statements = statements; @@ -7810,27 +7106,19 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen */ static pm_while_node_t * pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) { - pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t); pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); - *node = (pm_while_node_t) { - { - .type = PM_WHILE_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = closing->end - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), - .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing), - .predicate = predicate, - .statements = statements - }; - - return node; + return pm_while_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_TOKENS(parser, keyword, closing), + TOK2LOC(parser, keyword), + NTOK2LOC(parser, do_keyword), + TOK2LOC(parser, closing), + predicate, + statements + ); } /** @@ -7838,28 +7126,20 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to */ static pm_while_node_t * pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) { - pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t); pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL); pm_loop_modifier_block_exits(parser, statements); - *node = (pm_while_node_t) { - { - .type = PM_WHILE_NODE, - .flags = flags, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = statements->base.location.start, - .end = predicate->location.end - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, - .predicate = predicate, - .statements = statements - }; - - return node; + return pm_while_node_new( + parser->arena, + ++parser->node_id, + flags, + PM_LOCATION_INIT_NODES(statements, predicate), + TOK2LOC(parser, keyword), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + predicate, + statements + ); } /** @@ -7867,22 +7147,17 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm */ static pm_while_node_t * pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) { - pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t); - - *node = (pm_while_node_t) { - { - .type = PM_WHILE_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = PM_LOCATION_NULL_VALUE(parser) - }, - .keyword_loc = PM_LOCATION_NULL_VALUE(parser), - .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser), - .closing_loc = PM_LOCATION_NULL_VALUE(parser), - .predicate = predicate, - .statements = statements - }; - - return node; + return pm_while_node_new( + parser->arena, + ++parser->node_id, + 0, + PM_LOCATION_INIT_UNSET, + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + ((pm_location_t) { 0 }), + predicate, + statements + ); } /** @@ -7891,31 +7166,22 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s */ static pm_x_string_node_t * pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) { - pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t); - - *node = (pm_x_string_node_t) { - { - .type = PM_X_STRING_NODE, - .flags = PM_STRING_FLAGS_FROZEN, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = opening->start, - .end = closing->end - }, - }, - .opening_loc = PM_LOCATION_TOKEN_VALUE(opening), - .content_loc = PM_LOCATION_TOKEN_VALUE(content), - .closing_loc = PM_LOCATION_TOKEN_VALUE(closing), - .unescaped = *unescaped - }; - - return node; + return pm_x_string_node_new( + parser->arena, + ++parser->node_id, + PM_STRING_FLAGS_FROZEN, + PM_LOCATION_INIT_TOKENS(parser, opening, closing), + TOK2LOC(parser, opening), + TOK2LOC(parser, content), + TOK2LOC(parser, closing), + *unescaped + ); } /** * Allocate and initialize a new XStringNode node. */ -static inline pm_x_string_node_t * +static PRISM_INLINE pm_x_string_node_t * pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) { return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY); } @@ -7925,40 +7191,31 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_ */ static pm_yield_node_t * pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) { - pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t); + uint32_t start = PM_TOKEN_START(parser, keyword); + uint32_t end; - const uint8_t *end; - if (rparen_loc->start != NULL) { - end = rparen_loc->end; + if (rparen_loc->length > 0) { + end = PM_LOCATION_END(rparen_loc); } else if (arguments != NULL) { - end = arguments->base.location.end; - } else if (lparen_loc->start != NULL) { - end = lparen_loc->end; + end = PM_NODE_END(arguments); + } else if (lparen_loc->length > 0) { + end = PM_LOCATION_END(lparen_loc); } else { - end = keyword->end; - } - - *node = (pm_yield_node_t) { - { - .type = PM_YIELD_NODE, - .node_id = PM_NODE_IDENTIFY(parser), - .location = { - .start = keyword->start, - .end = end - }, - }, - .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword), - .lparen_loc = *lparen_loc, - .arguments = arguments, - .rparen_loc = *rparen_loc - }; - - return node; + end = PM_TOKEN_END(parser, keyword); + } + + return pm_yield_node_new( + parser->arena, + ++parser->node_id, + 0, + ((pm_location_t) { .start = start, .length = U32(end - start) }), + TOK2LOC(parser, keyword), + *lparen_loc, + arguments, + *rparen_loc + ); } -#undef PM_NODE_ALLOC -#undef PM_NODE_IDENTIFY - /** * Check if any of the currently visible scopes contain a local variable * described by the given constant id. @@ -7984,7 +7241,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant * described by the given token. This function implicitly inserts a constant * into the constant pool. */ -static inline int +static PRISM_INLINE int pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token)); } @@ -7992,27 +7249,35 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) { /** * Add a constant id to the local table of the current scope. */ -static inline void +static PRISM_INLINE void pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) { - pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads); + pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads); } /** * Add a local variable from a location to the current scope. */ static pm_constant_id_t -pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) { - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end); +pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) { + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end); if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads); return constant_id; } /** + * Add a local variable from a location to the current scope. + */ +static PRISM_INLINE pm_constant_id_t +pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) { + return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads); +} + +/** * Add a local variable from a token to the current scope. */ -static inline pm_constant_id_t +static PRISM_INLINE pm_constant_id_t pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) { - return pm_parser_local_add_location(parser, token->start, token->end, reads); + return pm_parser_local_add_raw(parser, token->start, token->end, reads); } /** @@ -8046,7 +7311,7 @@ static bool pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) { // We want to check whether the parameter name is a numbered parameter or // not. - pm_refute_numbered_parameter(parser, name->start, name->end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name)); // Otherwise we'll fetch the constant id for the parameter name and check // whether it's already in the current scope. @@ -8070,8 +7335,7 @@ pm_parser_scope_pop(pm_parser_t *parser) { pm_scope_t *scope = parser->current_scope; parser->current_scope = scope->previous; pm_locals_free(&scope->locals); - pm_node_list_free(&scope->implicit_parameters); - xfree(scope); + xfree_sized(scope, sizeof(pm_scope_t)); } /******************************************************************************/ @@ -8081,7 +7345,7 @@ pm_parser_scope_pop(pm_parser_t *parser) { /** * Pushes a value onto the stack. */ -static inline void +static PRISM_INLINE void pm_state_stack_push(pm_state_stack_t *stack, bool value) { *stack = (*stack << 1) | (value & 1); } @@ -8089,7 +7353,7 @@ pm_state_stack_push(pm_state_stack_t *stack, bool value) { /** * Pops a value off the stack. */ -static inline void +static PRISM_INLINE void pm_state_stack_pop(pm_state_stack_t *stack) { *stack >>= 1; } @@ -8097,38 +7361,38 @@ pm_state_stack_pop(pm_state_stack_t *stack) { /** * Returns the value at the top of the stack. */ -static inline bool +static PRISM_INLINE bool pm_state_stack_p(const pm_state_stack_t *stack) { return *stack & 1; } -static inline void +static PRISM_INLINE void pm_accepts_block_stack_push(pm_parser_t *parser, bool value) { // Use the negation of the value to prevent stack overflow. pm_state_stack_push(&parser->accepts_block_stack, !value); } -static inline void +static PRISM_INLINE void pm_accepts_block_stack_pop(pm_parser_t *parser) { pm_state_stack_pop(&parser->accepts_block_stack); } -static inline bool +static PRISM_INLINE bool pm_accepts_block_stack_p(pm_parser_t *parser) { return !pm_state_stack_p(&parser->accepts_block_stack); } -static inline void +static PRISM_INLINE void pm_do_loop_stack_push(pm_parser_t *parser, bool value) { pm_state_stack_push(&parser->do_loop_stack, value); } -static inline void +static PRISM_INLINE void pm_do_loop_stack_pop(pm_parser_t *parser) { pm_state_stack_pop(&parser->do_loop_stack); } -static inline bool +static PRISM_INLINE bool pm_do_loop_stack_p(pm_parser_t *parser) { return pm_state_stack_p(&parser->do_loop_stack); } @@ -8141,7 +7405,7 @@ pm_do_loop_stack_p(pm_parser_t *parser) { * Get the next character in the source starting from +cursor+. If that position * is beyond the end of the source then return '\0'. */ -static inline uint8_t +static PRISM_INLINE uint8_t peek_at(const pm_parser_t *parser, const uint8_t *cursor) { if (cursor < parser->end) { return *cursor; @@ -8155,7 +7419,7 @@ peek_at(const pm_parser_t *parser, const uint8_t *cursor) { * adding the given offset. If that position is beyond the end of the source * then return '\0'. */ -static inline uint8_t +static PRISM_INLINE uint8_t peek_offset(pm_parser_t *parser, ptrdiff_t offset) { return peek_at(parser, parser->current.end + offset); } @@ -8164,7 +7428,7 @@ peek_offset(pm_parser_t *parser, ptrdiff_t offset) { * Get the next character in the source starting from parser->current.end. If * that position is beyond the end of the source then return '\0'. */ -static inline uint8_t +static PRISM_INLINE uint8_t peek(const pm_parser_t *parser) { return peek_at(parser, parser->current.end); } @@ -8173,7 +7437,7 @@ peek(const pm_parser_t *parser) { * If the character to be read matches the given value, then returns true and * advances the current pointer. */ -static inline bool +static PRISM_INLINE bool match(pm_parser_t *parser, uint8_t value) { if (peek(parser) == value) { parser->current.end++; @@ -8186,7 +7450,7 @@ match(pm_parser_t *parser, uint8_t value) { * Return the length of the line ending string starting at +cursor+, or 0 if it * is not a line ending. This function is intended to be CRLF/LF agnostic. */ -static inline size_t +static PRISM_INLINE size_t match_eol_at(pm_parser_t *parser, const uint8_t *cursor) { if (peek_at(parser, cursor) == '\n') { return 1; @@ -8202,7 +7466,7 @@ match_eol_at(pm_parser_t *parser, const uint8_t *cursor) { * `parser->current.end + offset`, or 0 if it is not a line ending. This * function is intended to be CRLF/LF agnostic. */ -static inline size_t +static PRISM_INLINE size_t match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) { return match_eol_at(parser, parser->current.end + offset); } @@ -8212,7 +7476,7 @@ match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) { * or 0 if it is not a line ending. This function is intended to be CRLF/LF * agnostic. */ -static inline size_t +static PRISM_INLINE size_t match_eol(pm_parser_t *parser) { return match_eol_at(parser, parser->current.end); } @@ -8220,7 +7484,7 @@ match_eol(pm_parser_t *parser) { /** * Skip to the next newline character or NUL byte. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * next_newline(const uint8_t *cursor, ptrdiff_t length) { assert(length >= 0); @@ -8233,7 +7497,7 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) { /** * This is equivalent to the predicate of warn_balanced in CRuby. */ -static inline bool +static PRISM_INLINE bool ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) { return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser)); } @@ -8311,7 +7575,7 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) { // issue because we didn't understand the encoding that the user was // trying to use. In this case we'll keep using the default encoding but // add an error to the parser to indicate an unsuccessful parse. - pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); + pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); } } @@ -8336,7 +7600,7 @@ parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t valu } } -static inline bool +static PRISM_INLINE bool pm_char_is_magic_comment_key_delimiter(const uint8_t b) { return b == '\'' || b == '"' || b == ':' || b == ';'; } @@ -8346,13 +7610,15 @@ pm_char_is_magic_comment_key_delimiter(const uint8_t b) { * found, it returns a pointer to the start of the marker. Otherwise it returns * NULL. */ -static inline const uint8_t * +static PRISM_INLINE const uint8_t * parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) { - while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) { - if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') { - return cursor; + // Scan for '*' as the middle character, since it is rarer than '-' in + // typical comments and avoids repeated memchr calls for '-' that hit + // dashes in words like "foo-bar". + while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) { + if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') { + return cursor - 1; } - cursor++; } return NULL; } @@ -8367,7 +7633,7 @@ parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor * It returns true if it consumes the entire comment. Otherwise it returns * false. */ -static inline bool +static PRISM_INLINE bool parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { bool result = true; @@ -8389,11 +7655,24 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // have a magic comment. return false; } + } else { + // Non-emacs magic comments must contain a colon for `key: value`. + // Reject early if there is no colon to avoid scanning the entire + // comment character-by-character. + if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) { + return false; + } + + // Advance start past leading whitespace so the main loop begins + // directly at the key, avoiding a redundant whitespace scan. + start += pm_strspn_whitespace(start, end - start); } cursor = start; while (cursor < end) { - while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++; + if (indicator) { + while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++; + } const uint8_t *key_start = cursor; while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++; @@ -8421,7 +7700,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { if (*cursor == '\\' && (cursor + 1 < end)) cursor++; } value_end = cursor; - if (*cursor == '"') cursor++; + if (cursor < end && *cursor == '"') cursor++; } else { value_start = cursor; while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++; @@ -8479,7 +7758,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID: PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -8506,7 +7785,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID: PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -8541,7 +7820,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { } else { PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_MAGIC_COMMENT_VALUE, (int) key_length, (const char *) key_source, @@ -8554,17 +7833,14 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { // When we're done, we want to free the string in case we had to // allocate memory for it. - pm_string_free(&key); + pm_string_cleanup(&key); // Allocate a new magic comment node to append to the parser's list. - pm_magic_comment_t *magic_comment; - if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) { - magic_comment->key_start = key_start; - magic_comment->value_start = value_start; - magic_comment->key_length = (uint32_t) key_length; - magic_comment->value_length = value_length; - pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); - } + pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t)); + magic_comment->node.next = NULL; + magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) }; + magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length }; + pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment); } return result; @@ -8574,85 +7850,67 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) { /* Context manipulations */ /******************************************************************************/ -static bool -context_terminator(pm_context_t context, pm_token_t *token) { - switch (context) { - case PM_CONTEXT_MAIN: - case PM_CONTEXT_DEF_PARAMS: - case PM_CONTEXT_DEFINED: - case PM_CONTEXT_MULTI_TARGET: - case PM_CONTEXT_TERNARY: - case PM_CONTEXT_RESCUE_MODIFIER: - return token->type == PM_TOKEN_EOF; - case PM_CONTEXT_DEFAULT_PARAMS: - return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT; - case PM_CONTEXT_PREEXE: - case PM_CONTEXT_POSTEXE: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_MODULE: - case PM_CONTEXT_CLASS: - case PM_CONTEXT_SCLASS: - case PM_CONTEXT_LAMBDA_DO_END: - case PM_CONTEXT_DEF: - case PM_CONTEXT_BLOCK_KEYWORDS: - return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE; - case PM_CONTEXT_WHILE: - case PM_CONTEXT_UNTIL: - case PM_CONTEXT_ELSE: - case PM_CONTEXT_FOR: - case PM_CONTEXT_BEGIN_ENSURE: - case PM_CONTEXT_BLOCK_ENSURE: - case PM_CONTEXT_CLASS_ENSURE: - case PM_CONTEXT_DEF_ENSURE: - case PM_CONTEXT_LAMBDA_ENSURE: - case PM_CONTEXT_MODULE_ENSURE: - case PM_CONTEXT_SCLASS_ENSURE: - return token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_LOOP_PREDICATE: - return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN; - case PM_CONTEXT_FOR_INDEX: - return token->type == PM_TOKEN_KEYWORD_IN; - case PM_CONTEXT_CASE_WHEN: - return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE; - case PM_CONTEXT_CASE_IN: - return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE; - case PM_CONTEXT_IF: - case PM_CONTEXT_ELSIF: - return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_UNLESS: - return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_EMBEXPR: - return token->type == PM_TOKEN_EMBEXPR_END; - case PM_CONTEXT_BLOCK_BRACES: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_PARENS: - return token->type == PM_TOKEN_PARENTHESIS_RIGHT; - case PM_CONTEXT_BEGIN: - case PM_CONTEXT_BEGIN_RESCUE: - case PM_CONTEXT_BLOCK_RESCUE: - case PM_CONTEXT_CLASS_RESCUE: - case PM_CONTEXT_DEF_RESCUE: - case PM_CONTEXT_LAMBDA_RESCUE: - case PM_CONTEXT_MODULE_RESCUE: - case PM_CONTEXT_SCLASS_RESCUE: - return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_BEGIN_ELSE: - case PM_CONTEXT_BLOCK_ELSE: - case PM_CONTEXT_CLASS_ELSE: - case PM_CONTEXT_DEF_ELSE: - case PM_CONTEXT_LAMBDA_ELSE: - case PM_CONTEXT_MODULE_ELSE: - case PM_CONTEXT_SCLASS_ELSE: - return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END; - case PM_CONTEXT_LAMBDA_BRACES: - return token->type == PM_TOKEN_BRACE_RIGHT; - case PM_CONTEXT_PREDICATE: - return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON; - case PM_CONTEXT_NONE: - return false; - } +static const uint32_t context_terminators[] = { + [PM_CONTEXT_NONE] = 0, + [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE), + [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE), + [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE), + [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF), + [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF), + [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT), + [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END), + [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN), + [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN), + [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF), + [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF), + [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT), + [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON), + [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT), + [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF), + [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE), + [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF), + [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END), + [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END), +}; - return false; +static PRISM_INLINE bool +context_terminator(pm_context_t context, pm_token_t *token) { + return token->type < 32 && (context_terminators[context] & (1U << token->type)); } /** @@ -8691,7 +7949,7 @@ context_push(pm_parser_t *parser, pm_context_t context) { static void context_pop(pm_parser_t *parser) { pm_context_node_t *prev = parser->current_context->prev; - xfree(parser->current_context); + xfree_sized(parser->current_context, sizeof(pm_context_node_t)); parser->current_context = prev; } @@ -8753,6 +8011,7 @@ context_human(pm_context_t context) { case PM_CONTEXT_BEGIN: return "begin statement"; case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block"; case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block"; + case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter"; case PM_CONTEXT_CASE_WHEN: return "'when' clause"; case PM_CONTEXT_CASE_IN: return "'in' clause"; case PM_CONTEXT_CLASS: return "class definition"; @@ -8813,11 +8072,11 @@ context_human(pm_context_t context) { /* Specific token lexers */ /******************************************************************************/ -static inline void +static PRISM_INLINE void pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) { if (invalid != NULL) { pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER; - pm_parser_err(parser, invalid, invalid + 1, diag_id); + pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id); } } @@ -8928,7 +8187,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY); } - parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY; + parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY; break; // 0o1111 is an octal number @@ -8942,7 +8201,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL); } - parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL; break; // 01111 is an octal number @@ -8956,7 +8215,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { case '6': case '7': parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end); - parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL; break; // 0x1111 is a hexadecimal number @@ -8970,7 +8229,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL); } - parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL; break; // 0.xxx is a float @@ -8988,11 +8247,62 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { } } else { // If it didn't start with a 0, then we'll lex as far as we can into a - // decimal number. - parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end); + // decimal number. We compute the integer value inline to avoid + // re-scanning the digits later in pm_integer_parse. + { + const uint8_t *cursor = parser->current.end; + const uint8_t *end = parser->end; + uint64_t value = (uint64_t) (cursor[-1] - '0'); + + bool has_underscore = false; + bool prev_underscore = false; + const uint8_t *invalid = NULL; + + while (cursor < end) { + uint8_t c = *cursor; + if (c >= '0' && c <= '9') { + if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0'); + prev_underscore = false; + cursor++; + } else if (c == '_') { + has_underscore = true; + if (prev_underscore && invalid == NULL) invalid = cursor; + prev_underscore = true; + cursor++; + } else { + break; + } + } + + if (has_underscore) { + if (prev_underscore && invalid == NULL) invalid = cursor - 1; + pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid); + } + + if (value <= UINT32_MAX) { + parser->integer.value = (uint32_t) value; + parser->integer.lexed = true; + } + + parser->current.end = cursor; + } // Afterward, we'll lex as far as we can into an optional float suffix. - type = lex_optional_float_suffix(parser, seen_e); + // Guard the function call: the vast majority of decimal numbers are + // plain integers, so avoid the call when the next byte cannot start a + // float suffix. + { + uint8_t next = peek(parser); + if (next == '.' || next == 'e' || next == 'E') { + type = lex_optional_float_suffix(parser, seen_e); + + // If it turned out to be a float, the cached integer value is + // invalid. + if (type != PM_TOKEN_INTEGER) { + parser->integer.lexed = false; + } + } + } } // At this point we have a completed number, but we want to provide the user @@ -9002,7 +8312,7 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { const uint8_t *fraction_start = parser->current.end; const uint8_t *fraction_end = parser->current.end + 2; fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end); - pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION); + pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION); } return type; @@ -9011,7 +8321,8 @@ lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) { static pm_token_type_t lex_numeric(pm_parser_t *parser) { pm_token_type_t type = PM_TOKEN_INTEGER; - parser->integer_base = PM_INTEGER_BASE_FLAGS_DECIMAL; + parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL; + parser->integer.lexed = false; if (parser->current.end < parser->end) { bool seen_e = false; @@ -9101,8 +8412,8 @@ lex_global_variable(pm_parser_t *parser) { } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0); // $0 isn't allowed to be followed by anything. - pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id); + pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id); } return PM_TOKEN_GLOBAL_VARIABLE; @@ -9138,9 +8449,9 @@ lex_global_variable(pm_parser_t *parser) { } else { // If we get here, then we have a $ followed by something that // isn't recognized as a global variable. - pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; - const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start); + pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; + size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start); } return PM_TOKEN_GLOBAL_VARIABLE; @@ -9160,7 +8471,7 @@ lex_global_variable(pm_parser_t *parser) { * * `type` - the expected token type * * `modifier_type` - the expected modifier token type */ -static inline pm_token_type_t +static PRISM_INLINE pm_token_type_t lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) { if (memcmp(current_start, value, vlen) == 0) { pm_lex_state_t last_state = parser->lex_state; @@ -9199,6 +8510,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) { current_end += width; } } else { + // Fast path: scan ASCII identifier bytes using wide operations. + current_end += scan_identifier_ascii(current_end, end); + + // Byte-at-a-time fallback for the tail and any UTF-8 sequences. while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) { current_end += width; } @@ -9258,9 +8573,15 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) { switch (width) { case 2: if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) { + if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) { + return PM_TOKEN_KEYWORD_DO; + } if (pm_do_loop_stack_p(parser)) { return PM_TOKEN_KEYWORD_DO_LOOP; } + if (!pm_accepts_block_stack_p(parser)) { + return PM_TOKEN_KEYWORD_DO_BLOCK; + } return PM_TOKEN_KEYWORD_DO; } @@ -9339,8 +8660,8 @@ current_token_starts_line(pm_parser_t *parser) { * handle interpolation. This function performs that check. It returns a token * type representing what it found. Those cases are: * - * * PM_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The - * caller should keep lexing. + * * 0 - No interpolation was found at this point. The caller should keep + * lexing. * * PM_TOKEN_STRING_CONTENT - No interpolation was found at this point. The * caller should return this token type. * * PM_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller @@ -9357,9 +8678,9 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { return PM_TOKEN_STRING_CONTENT; } - // Now we'll check against the character that follows the #. If it constitutes - // valid interplation, we'll handle that, otherwise we'll return - // PM_TOKEN_NOT_PROVIDED. + // Now we'll check against the character that follows the #. If it + // constitutes valid interplation, we'll handle that, otherwise we'll return + // 0. switch (pound[1]) { case '@': { // In this case we may have hit an embedded instance or class variable. @@ -9393,7 +8714,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // string content. This is like if we get "#@-". In this case the caller // should keep lexing. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; } case '$': // In this case we may have hit an embedded global variable. If there's @@ -9443,7 +8764,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // In this case we've hit a #$ that does not indicate a global variable. // In this case we'll continue lexing past it. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; case '{': // In this case it's the start of an embedded expression. If we have // already consumed content, then we need to return that content as string @@ -9467,7 +8788,7 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) { // mark that by returning the not provided token type. This tells the // consumer to keep lexing forward. parser->current.end = pound + 1; - return PM_TOKEN_NOT_PROVIDED; + return 0; } } @@ -9491,7 +8812,7 @@ static const bool ascii_printable_chars[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; -static inline bool +static PRISM_INLINE bool char_is_ascii_printable(const uint8_t b) { return (b < 0x80) && ascii_printable_chars[b]; } @@ -9500,7 +8821,7 @@ char_is_ascii_printable(const uint8_t b) { * Return the value that a hexadecimal digit character represents. For example, * transform 'a' into 10, 'b' into 11, etc. */ -static inline uint8_t +static PRISM_INLINE uint8_t escape_hexadecimal_digit(const uint8_t value) { return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9); } @@ -9510,8 +8831,8 @@ escape_hexadecimal_digit(const uint8_t value) { * digits scanned. This function assumes that the characters have already been * validated. */ -static inline uint32_t -escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) { +static PRISM_INLINE uint32_t +escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) { uint32_t value = 0; for (size_t index = 0; index < length; index++) { if (index != 0) value <<= 4; @@ -9521,7 +8842,14 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) { // Here we're going to verify that the value is actually a valid Unicode // codepoint and not a surrogate pair. if (value >= 0xD800 && value <= 0xDFFF) { - pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE); + if (flags & PM_ESCAPE_FLAG_REGEXP) { + // In regexp context, defer the error to regexp encoding + // validation where we can produce a regexp-specific message. + } else if (error_location != NULL) { + pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE); + } else { + pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE); + } return 0xFFFD; } @@ -9531,7 +8859,7 @@ escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) { /** * Escape a single character value based on the given flags. */ -static inline uint8_t +static PRISM_INLINE uint8_t escape_byte(uint8_t value, const uint8_t flags) { if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f; if (flags & PM_ESCAPE_FLAG_META) value |= 0x80; @@ -9541,21 +8869,32 @@ escape_byte(uint8_t value, const uint8_t flags) { /** * Write a unicode codepoint to the given buffer. */ -static inline void +static PRISM_INLINE void escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) { // \u escape sequences in string-like structures implicitly change the // encoding to UTF-8 if they are >= 0x80 or if they are used in a character // literal. if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) { if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name); + if (flags & PM_ESCAPE_FLAG_REGEXP) { + // In regexp context, suppress this error — the regexp encoding + // validation will produce a more specific error message. + } else { + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name); + } } parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY; } if (!pm_buffer_append_unicode_codepoint(buffer, value)) { - pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE); + if (flags & PM_ESCAPE_FLAG_REGEXP) { + // In regexp context, defer the error to the regexp encoding + // validation which produces a regexp-specific message. + } else { + pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE); + } + pm_buffer_append_byte(buffer, 0xEF); pm_buffer_append_byte(buffer, 0xBF); pm_buffer_append_byte(buffer, 0xBD); @@ -9566,11 +8905,16 @@ escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t fla * When you're writing a byte to the unescape buffer, if the byte is non-ASCII * (i.e., the top bit is set) then it locks in the encoding. */ -static inline void -escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) { +static PRISM_INLINE void +escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) { if (byte >= 0x80) { if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name); + if (flags & PM_ESCAPE_FLAG_REGEXP) { + // In regexp context, suppress this error — the regexp encoding + // validation will produce a more specific error message. + } else { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name); + } } parser->explicit_encoding = parser->encoding; @@ -9594,19 +8938,19 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte * Note that in this case there is a literal \ byte in the regular expression * source so that the regular expression engine will perform its own unescaping. */ -static inline void +static PRISM_INLINE void escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) { if (flags & PM_ESCAPE_FLAG_REGEXP) { pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte); } - escape_write_byte_encoded(parser, buffer, byte); + escape_write_byte_encoded(parser, buffer, flags, byte); } /** * Write each byte of the given escaped character into the buffer. */ -static inline void +static PRISM_INLINE void escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) { size_t width; if (parser->encoding_changed) { @@ -9616,6 +8960,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_ } if (width == 1) { + if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags)); } else if (width > 1) { // Valid multibyte character. Just ignore escape. @@ -9641,7 +8986,7 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t PM_PARSER_WARN_TOKEN_FORMAT( parser, - parser->current, + &parser->current, PM_WARN_INVALID_CHARACTER, FLAG(flags), FLAG(flag), @@ -9756,7 +9101,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } } - escape_write_byte_encoded(parser, buffer, value); + escape_write_byte_encoded(parser, buffer, flags, value); } else { pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL); } @@ -9769,7 +9114,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (parser->current.end == parser->end) { const uint8_t *start = parser->current.end - 2; - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); } else if (peek(parser) == '{') { const uint8_t *unicode_codepoints_start = parser->current.end - 2; parser->current.end++; @@ -9798,18 +9143,19 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (hexadecimal_length > 6) { // \u{nnnn} character literal allows only 1-6 hexadecimal digits - pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG); + pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG); } else if (hexadecimal_length == 0) { // there are not hexadecimal characters if (flags & PM_ESCAPE_FLAG_REGEXP) { // If this is a regular expression, we are going to // let the regular expression engine handle this - // error instead of us. + // error instead of us because we don't know at this + // point if we're inside a comment in /x mode. pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { - pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE); - pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE); + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); } return; @@ -9821,7 +9167,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre extra_codepoints_start = unicode_start; } - uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length); + uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags); escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value); parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end); @@ -9830,21 +9176,22 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre // ?\u{nnnn} character literal should contain only one codepoint // and cannot be like ?\u{nnnn mmmm}. if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) { - pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL); + pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL); } if (parser->current.end == parser->end) { - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start); } else if (peek(parser) == '}') { parser->current.end++; } else { if (flags & PM_ESCAPE_FLAG_REGEXP) { // If this is a regular expression, we are going to let // the regular expression engine handle this error - // instead of us. + // instead of us because we don't know at this point if + // we're inside a comment in /x mode. pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { - pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM); + pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM); } } @@ -9859,10 +9206,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start)); } else { const uint8_t *start = parser->current.end - 2; - PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); + PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start); } } else if (length == 4) { - uint32_t value = escape_unicode(parser, parser->current.end, 4); + uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags); if (flags & PM_ESCAPE_FLAG_REGEXP) { pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start)); @@ -9908,7 +9255,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9930,6 +9277,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } + if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL)); return; @@ -9944,7 +9292,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (peek(parser) != '-') { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL); return; } @@ -9965,7 +9313,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -9984,10 +9332,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: { if (!char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL); return; } + if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL)); return; @@ -10002,7 +9351,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre if (peek(parser) != '-') { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } @@ -10018,7 +9367,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre parser->current.end++; if (match(parser, 'u') || match(parser, 'U')) { - pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER); return; } @@ -10037,10 +9386,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: if (!char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } + if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META)); return; @@ -10048,8 +9398,9 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } case '\r': { if (peek_offset(parser, 1) == '\n') { + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2); parser->current.end += 2; - escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags)); + escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags)); return; } PRISM_FALLTHROUGH @@ -10057,7 +9408,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre default: { if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) { size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); - pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META); + pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META); return; } if (parser->current.end < parser->end) { @@ -10119,10 +9470,14 @@ lex_question_mark(pm_parser_t *parser) { lex_state_set(parser, PM_LEX_STATE_END); pm_buffer_t buffer; - pm_buffer_init_capacity(&buffer, 3); + pm_buffer_init(&buffer, 3); escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE); - pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length); + + // Copy buffer data into the arena and free the heap buffer. + void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t)); + pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length); + pm_buffer_cleanup(&buffer); return PM_TOKEN_CHARACTER_LITERAL; } else { @@ -10165,12 +9520,12 @@ lex_at_variable(pm_parser_t *parser) { } } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) { pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE; - if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) { + if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) { diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3; } size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end); - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start); } else { pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE; pm_parser_err_token(parser, &parser->current, diag_id); @@ -10188,24 +9543,23 @@ lex_at_variable(pm_parser_t *parser) { /** * Optionally call out to the lex callback if one is provided. */ -static inline void +static PRISM_INLINE void parser_lex_callback(pm_parser_t *parser) { - if (parser->lex_callback) { - parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current); + if (parser->lex_callback.callback) { + parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data); } } /** * Return a new comment node of the specified type. */ -static inline pm_comment_t * +static PRISM_INLINE pm_comment_t * parser_comment(pm_parser_t *parser, pm_comment_type_t type) { - pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t)); - if (comment == NULL) return NULL; + pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t)); *comment = (pm_comment_t) { .type = type, - .location = { parser->current.start, parser->current.end } + .location = TOK2LOC(parser, &parser->current) }; return comment; @@ -10224,7 +9578,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -10232,8 +9586,8 @@ lex_embdoc(pm_parser_t *parser) { parser_lex_callback(parser); // Now, create a comment that is going to be attached to the parser. + const uint8_t *comment_start = parser->current.start; pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC); - if (comment == NULL) return PM_TOKEN_EOF; // Now, loop until we find the end of the embedded documentation or the end // of the file. @@ -10257,14 +9611,14 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } parser->current.type = PM_TOKEN_EMBDOC_END; parser_lex_callback(parser); - comment->location.end = parser->current.end; + comment->location.length = (uint32_t) (parser->current.end - comment_start); pm_list_append(&parser->comment_list, (pm_list_node_t *) comment); return PM_TOKEN_EMBDOC_END; @@ -10277,7 +9631,7 @@ lex_embdoc(pm_parser_t *parser) { if (newline == NULL) { parser->current.end = parser->end; } else { - pm_newline_list_append(&parser->newline_list, newline); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); parser->current.end = newline + 1; } @@ -10287,7 +9641,7 @@ lex_embdoc(pm_parser_t *parser) { pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM); - comment->location.end = parser->current.end; + comment->location.length = (uint32_t) (parser->current.end - comment_start); pm_list_append(&parser->comment_list, (pm_list_node_t *) comment); return PM_TOKEN_EOF; @@ -10298,7 +9652,7 @@ lex_embdoc(pm_parser_t *parser) { * This happens in a couple places depending on whether or not we have already * lexed a comment. */ -static inline void +static PRISM_INLINE void parser_lex_ignored_newline(pm_parser_t *parser) { parser->current.type = PM_TOKEN_IGNORED_NEWLINE; parser_lex_callback(parser); @@ -10313,7 +9667,7 @@ parser_lex_ignored_newline(pm_parser_t *parser) { * If it is set, then we need to skip past the heredoc body and then clear the * heredoc_end field. */ -static inline void +static PRISM_INLINE void parser_flush_heredoc_end(pm_parser_t *parser) { assert(parser->heredoc_end <= parser->end); parser->next_start = parser->heredoc_end; @@ -10389,12 +9743,12 @@ typedef struct { /** * Push the given byte into the token buffer. */ -static inline void +static PRISM_INLINE void pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) { pm_buffer_append_byte(&token_buffer->buffer, byte); } -static inline void +static PRISM_INLINE void pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) { pm_buffer_append_byte(&token_buffer->regexp_buffer, byte); } @@ -10402,7 +9756,7 @@ pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t /** * Return the width of the character at the end of the current token. */ -static inline size_t +static PRISM_INLINE size_t parser_char_width(const pm_parser_t *parser) { size_t width; if (parser->encoding_changed) { @@ -10429,36 +9783,31 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse static void pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) { size_t width = parser_char_width(parser); - pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width); - pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width); + const uint8_t *start = parser->current.end; + pm_buffer_append_bytes(&token_buffer->base.buffer, start, width); + pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width); parser->current.end += width; } -static bool -pm_slice_ascii_only_p(const uint8_t *value, size_t length) { - for (size_t index = 0; index < length; index++) { - if (value[index] & 0x80) return false; - } - - return true; -} - /** * When we're about to return from lexing the current token and we know for sure * that we have found an escape sequence, this function is called to copy the * contents of the token buffer into the current string on the parser so that it * can be attached to the correct node. */ -static inline void +static PRISM_INLINE void pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { - pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer)); + // Copy buffer data into the arena and free the heap buffer. + size_t len = pm_buffer_length(&token_buffer->buffer); + void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t)); + pm_string_constant_init(&parser->current_string, (const char *) arena_data, len); + pm_buffer_cleanup(&token_buffer->buffer); } -static inline void +static PRISM_INLINE void pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) { - pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer)); - parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer)); - pm_buffer_free(&token_buffer->regexp_buffer); + pm_token_buffer_copy(parser, &token_buffer->base); + pm_buffer_cleanup(&token_buffer->regexp_buffer); } /** @@ -10484,10 +9833,11 @@ static void pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) { if (token_buffer->base.cursor == NULL) { pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end); - parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start)); } else { - pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor)); - pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor)); + const uint8_t *cursor = token_buffer->base.cursor; + size_t length = (size_t) (parser->current.end - cursor); + pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length); + pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length); pm_regexp_token_buffer_copy(parser, token_buffer); } } @@ -10506,7 +9856,7 @@ static void pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { const uint8_t *start; if (token_buffer->cursor == NULL) { - pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); + pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); start = parser->current.start; } else { start = token_buffer->cursor; @@ -10523,8 +9873,8 @@ static void pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) { const uint8_t *start; if (token_buffer->base.cursor == NULL) { - pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); - pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); + pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); + pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE); start = parser->current.start; } else { start = token_buffer->base.cursor; @@ -10543,7 +9893,7 @@ pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *tok * Effectively the same thing as pm_strspn_inline_whitespace, but in the case of * a tilde heredoc expands out tab characters to the nearest tab boundaries. */ -static inline size_t +static PRISM_INLINE size_t pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) { size_t whitespace = 0; @@ -10591,7 +9941,7 @@ pm_lex_percent_delimiter(pm_parser_t *parser) { parser_flush_heredoc_end(parser); } else { // Otherwise, we'll add the newline to the list of newlines. - pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length)); } uint8_t delimiter = *parser->current.end; @@ -10639,6 +9989,12 @@ parser_lex(pm_parser_t *parser) { unsigned int semantic_token_seen = parser->semantic_token_seen; parser->semantic_token_seen = true; + // We'll jump to this label when we are about to encounter an EOF. + // If we still have lex_modes on the stack, we pop them so that cleanup + // can happen. For example, we should still continue parsing after a heredoc + // identifier, even if the heredoc body was syntax invalid. + switch_lex_modes: + switch (parser->lex_modes.current->mode) { case PM_LEX_DEFAULT: case PM_LEX_EMBEXPR: @@ -10661,22 +10017,29 @@ parser_lex(pm_parser_t *parser) { bool space_seen = false; // First, we're going to skip past any whitespace at the front of the next - // token. + // token. Skip runs of inline whitespace in bulk to avoid per-character + // stores back to parser->current.end. bool chomping = true; while (parser->current.end < parser->end && chomping) { - switch (*parser->current.end) { - case ' ': - case '\t': - case '\f': - case '\v': - parser->current.end++; + { + static const uint8_t inline_whitespace[256] = { + [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1 + }; + const uint8_t *scan = parser->current.end; + while (scan < parser->end && inline_whitespace[*scan]) scan++; + if (scan > parser->current.end) { + parser->current.end = scan; space_seen = true; - break; + continue; + } + } + + switch (*parser->current.end) { case '\r': if (match_eol_offset(parser, 1)) { chomping = false; } else { - pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); + pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN); parser->current.end++; space_seen = true; } @@ -10689,7 +10052,7 @@ parser_lex(pm_parser_t *parser) { parser->heredoc_end = NULL; } else { parser->current.end += eol_length + 1; - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); space_seen = true; } } else if (pm_char_is_inline_whitespace(*parser->current.end)) { @@ -10712,6 +10075,14 @@ parser_lex(pm_parser_t *parser) { // We'll check if we're at the end of the file. If we are, then we // need to return the EOF token. if (parser->current.end >= parser->end) { + // We may be missing closing tokens. We should pop modes one by one + // to do the appropriate cleanup like moving next_start for heredocs. + // Only when no mode is remaining will we actually emit the EOF token. + if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) { + lex_mode_pop(parser); + goto switch_lex_modes; + } + // If we hit EOF, but the EOF came immediately after a newline, // set the start of the token to the newline. This way any EOF // errors will be reported as happening on that line rather than @@ -10783,7 +10154,7 @@ parser_lex(pm_parser_t *parser) { } if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } } @@ -10841,14 +10212,50 @@ parser_lex(pm_parser_t *parser) { following = next_newline(following, parser->end - following); } - // If the lex state was ignored, or we hit a '.' or a '&.', - // we will lex the ignored newline + // If the lex state was ignored, we will lex the + // ignored newline. + if (lex_state_ignored_p(parser)) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lexed_comment = false; + goto lex_next_token; + } + + // If we hit a '.' or a '&.' we will lex the ignored + // newline. + if (following && ( + (peek_at(parser, following) == '.') || + (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.') + )) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lexed_comment = false; + goto lex_next_token; + } + + + // If we are parsing as CRuby 4.0 or later and we + // hit a '&&' or a '||' then we will lex the ignored + // newline. if ( - lex_state_ignored_p(parser) || - (following && ( - (peek_at(parser, following) == '.') || - (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.') - )) + (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) && + following && ( + (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') || + (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') || + ( + peek_at(parser, following) == 'a' && + peek_at(parser, following + 1) == 'n' && + peek_at(parser, following + 2) == 'd' && + peek_at(parser, next_content + 3) != '!' && + peek_at(parser, next_content + 3) != '?' && + !char_is_identifier(parser, following + 3, parser->end - (following + 3)) + ) || + ( + peek_at(parser, following) == 'o' && + peek_at(parser, following + 1) == 'r' && + peek_at(parser, next_content + 2) != '!' && + peek_at(parser, next_content + 2) != '?' && + !char_is_identifier(parser, following + 2, parser->end - (following + 2)) + ) + ) ) { if (!lexed_comment) parser_lex_ignored_newline(parser); lexed_comment = false; @@ -10888,6 +10295,67 @@ parser_lex(pm_parser_t *parser) { parser->next_start = NULL; LEX(PM_TOKEN_AMPERSAND_DOT); } + + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) { + // If we hit an && then we are in a logical chain + // and we need to return the logical operator. + if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + LEX(PM_TOKEN_AMPERSAND_AMPERSAND); + } + + // If we hit a || then we are in a logical chain and + // we need to return the logical operator. + if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + LEX(PM_TOKEN_PIPE_PIPE); + } + + // If we hit an 'and' then we are in a logical chain + // and we need to return the logical operator. + if ( + peek_at(parser, next_content) == 'a' && + peek_at(parser, next_content + 1) == 'n' && + peek_at(parser, next_content + 2) == 'd' && + peek_at(parser, next_content + 3) != '!' && + peek_at(parser, next_content + 3) != '?' && + !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3)) + ) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 3; + parser->next_start = NULL; + parser->command_start = true; + LEX(PM_TOKEN_KEYWORD_AND); + } + + // If we hit a 'or' then we are in a logical chain + // and we need to return the logical operator. + if ( + peek_at(parser, next_content) == 'o' && + peek_at(parser, next_content + 1) == 'r' && + peek_at(parser, next_content + 2) != '!' && + peek_at(parser, next_content + 2) != '?' && + !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2)) + ) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + parser->command_start = true; + LEX(PM_TOKEN_KEYWORD_OR); + } + } } // At this point we know this is a regular newline, and we can set the @@ -10902,7 +10370,7 @@ parser_lex(pm_parser_t *parser) { // , case ',': if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type)); } lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); @@ -11028,7 +10496,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_USTAR_STAR; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -11053,7 +10521,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_USTAR; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -11179,7 +10647,7 @@ parser_lex(pm_parser_t *parser) { bool ident_error = false; if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) { - pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER); + pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER); ident_error = true; } @@ -11212,7 +10680,7 @@ parser_lex(pm_parser_t *parser) { } else { // Otherwise, we want to indicate that the body of the // heredoc starts on the character after the next newline. - pm_newline_list_append(&parser->newline_list, body_start); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1)); body_start++; } @@ -11231,7 +10699,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document"); } if (lex_state_operator_p(parser)) { @@ -11357,7 +10825,7 @@ parser_lex(pm_parser_t *parser) { } else if (lex_state_beg_p(parser)) { type = PM_TOKEN_UAMPERSAND; } else if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix"); } if (lex_state_operator_p(parser)) { @@ -11433,7 +10901,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator"); } lex_state_set(parser, PM_LEX_STATE_BEG); @@ -11474,7 +10942,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator"); } lex_state_set(parser, PM_LEX_STATE_BEG); @@ -11573,7 +11041,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal"); } if (lex_state_operator_p(parser)) { @@ -11758,7 +11226,7 @@ parser_lex(pm_parser_t *parser) { } if (ambiguous_operator_p(parser, space_seen)) { - PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal"); + PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal"); } lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG); @@ -11794,40 +11262,40 @@ parser_lex(pm_parser_t *parser) { // token after adding an appropriate error message. if (!width) { if (*parser->current.start >= 0x80) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start); } else if (*parser->current.start == '\\') { switch (peek_at(parser, parser->current.start + 1)) { case ' ': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space"); break; case '\f': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed"); break; case '\t': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab"); break; case '\v': parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab"); break; case '\r': if (peek_at(parser, parser->current.start + 2) != '\n') { parser->current.end++; - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return"); break; } PRISM_FALLTHROUGH default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash"); break; } } else if (char_is_ascii_printable(*parser->current.start)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start); } goto lex_next_token; @@ -11853,15 +11321,15 @@ parser_lex(pm_parser_t *parser) { // correct column information for it. const uint8_t *cursor = parser->current.end; while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) { - pm_newline_list_append(&parser->newline_list, cursor++); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start)); } parser->current.end = parser->end; parser->current.type = PM_TOKEN___END__; parser_lex_callback(parser); - parser->data_loc.start = parser->current.start; - parser->data_loc.end = parser->current.end; + parser->data_loc.start = PM_TOKEN_START(parser, &parser->current); + parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current); LEX(PM_TOKEN_EOF); } @@ -11886,7 +11354,7 @@ parser_lex(pm_parser_t *parser) { !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) && (type == PM_TOKEN_IDENTIFIER) && ((pm_parser_local_depth(parser, &parser->current) != -1) || - pm_token_is_numbered_parameter(parser->current.start, parser->current.end)) + pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))) ) { lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL); } @@ -11914,7 +11382,7 @@ parser_lex(pm_parser_t *parser) { whitespace += 1; } } else { - whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list); + whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); } if (whitespace > 0) { @@ -12029,7 +11497,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -12057,7 +11525,7 @@ parser_lex(pm_parser_t *parser) { if (*breakpoint == '#') { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had something // that looked like an interpolated class or instance variable // like "#@" but wasn't actually. In this case we'll just skip @@ -12162,7 +11630,13 @@ parser_lex(pm_parser_t *parser) { size_t eol_length = match_eol_at(parser, breakpoint); if (eol_length) { parser->current.end = breakpoint + eol_length; - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + + // Track the newline if we're not in a heredoc that + // would have already have added the newline to the + // list. + if (parser->heredoc_end == NULL) { + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + } } else { parser->current.end = breakpoint + 1; } @@ -12208,7 +11682,7 @@ parser_lex(pm_parser_t *parser) { // If we've hit a newline, then we need to track that in // the list of newlines. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); break; @@ -12256,7 +11730,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -12303,7 +11777,7 @@ parser_lex(pm_parser_t *parser) { // interpolation. pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had // something that looked like an interpolated class or // instance variable like "#@" but wasn't actually. In @@ -12416,7 +11890,13 @@ parser_lex(pm_parser_t *parser) { size_t eol_length = match_eol_at(parser, breakpoint); if (eol_length) { parser->current.end = breakpoint + eol_length; - pm_newline_list_append(&parser->newline_list, parser->current.end - 1); + + // Track the newline if we're not in a heredoc that + // would have already have added the newline to the + // list. + if (parser->heredoc_end == NULL) { + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current)); + } } else { parser->current.end = breakpoint + 1; } @@ -12428,6 +11908,13 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_LABEL_END); } + // When the delimiter itself is a newline, we won't + // get a chance to flush heredocs in the usual places since + // the newline is already consumed. + if (term == '\n' && parser->heredoc_end) { + parser_flush_heredoc_end(parser); + } + lex_state_set(parser, PM_LEX_STATE_END); lex_mode_pop(parser); LEX(PM_TOKEN_STRING_END); @@ -12460,7 +11947,7 @@ parser_lex(pm_parser_t *parser) { // for the terminator in case the terminator is a // newline character. if (parser->heredoc_end == NULL) { - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1)); parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true); break; @@ -12514,7 +12001,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } else { // ... else track the newline. - pm_newline_list_append(&parser->newline_list, parser->current.end); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); } parser->current.end++; @@ -12543,7 +12030,7 @@ parser_lex(pm_parser_t *parser) { case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had something that // looked like an interpolated class or instance variable like "#@" // but wasn't actually. In this case we'll just skip to the next @@ -12643,7 +12130,7 @@ parser_lex(pm_parser_t *parser) { (memcmp(terminator_start, ident_start, ident_length) == 0) ) { if (newline != NULL) { - pm_newline_list_append(&parser->newline_list, newline); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); } parser->current.end = terminator_end; @@ -12674,7 +12161,7 @@ parser_lex(pm_parser_t *parser) { // Otherwise we'll be parsing string content. These are the places // where we need to split up the content of the heredoc. We'll use // strpbrk to find the first of these characters. - uint8_t breakpoints[] = "\r\n\\#"; + uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#"; pm_heredoc_quote_t quote = heredoc_lex_mode->quote; if (quote == PM_HEREDOC_QUOTE_SINGLE) { @@ -12715,7 +12202,7 @@ parser_lex(pm_parser_t *parser) { LEX(PM_TOKEN_STRING_CONTENT); } - pm_newline_list_append(&parser->newline_list, breakpoint); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1)); // If we have a - or ~ heredoc, then we can match after // some leading whitespace. @@ -12833,7 +12320,10 @@ parser_lex(pm_parser_t *parser) { // string content. if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) { const uint8_t *end = parser->current.end; - pm_newline_list_append(&parser->newline_list, end); + + if (parser->heredoc_end == NULL) { + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1)); + } // Here we want the buffer to only // include up to the backslash. @@ -12864,7 +12354,7 @@ parser_lex(pm_parser_t *parser) { case '#': { pm_token_type_t type = lex_interpolation(parser, breakpoint); - if (type == PM_TOKEN_NOT_PROVIDED) { + if (!type) { // If we haven't returned at this point then we had // something that looked like an interpolated class // or instance variable like "#@" but wasn't @@ -13089,7 +12579,7 @@ pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = { /** * Returns true if the current token is of the given type. */ -static inline bool +static PRISM_INLINE bool match1(const pm_parser_t *parser, pm_token_type_t type) { return parser->current.type == type; } @@ -13097,7 +12587,7 @@ match1(const pm_parser_t *parser, pm_token_type_t type) { /** * Returns true if the current token is of either of the given types. */ -static inline bool +static PRISM_INLINE bool match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) { return match1(parser, type1) || match1(parser, type2); } @@ -13105,7 +12595,7 @@ match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) /** * Returns true if the current token is any of the three given types. */ -static inline bool +static PRISM_INLINE bool match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3); } @@ -13113,15 +12603,23 @@ match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, /** * Returns true if the current token is any of the four given types. */ -static inline bool +static PRISM_INLINE bool match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4); } /** + * Returns true if the current token is any of the six given types. + */ +static PRISM_INLINE bool +match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) { + return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6); +} + +/** * Returns true if the current token is any of the seven given types. */ -static inline bool +static PRISM_INLINE bool match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7); } @@ -13129,20 +12627,12 @@ match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, /** * Returns true if the current token is any of the eight given types. */ -static inline bool +static PRISM_INLINE bool match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) { return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8); } /** - * Returns true if the current token is any of the nine given types. - */ -static inline bool -match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) { - return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9); -} - -/** * If the current token is of the specified type, lex forward by one token and * return true. Otherwise, return false. For example: * @@ -13161,7 +12651,7 @@ accept1(pm_parser_t *parser, pm_token_type_t type) { * If the current token is either of the two given types, lex forward by one * token and return true. Otherwise return false. */ -static inline bool +static PRISM_INLINE bool accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) { if (match2(parser, type1, type2)) { parser_lex(parser); @@ -13186,10 +12676,10 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) { if (accept1(parser, type)) return; const uint8_t *location = parser->previous.end; - pm_parser_err(parser, location, location, diag_id); + pm_parser_err(parser, U32(location - parser->start), 0, diag_id); parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } /** @@ -13201,10 +12691,10 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di if (accept2(parser, type1, type2)) return; const uint8_t *location = parser->previous.end; - pm_parser_err(parser, location, location, diag_id); + pm_parser_err(parser, U32(location - parser->start), 0, diag_id); parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } /** @@ -13218,20 +12708,43 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide } else { pm_parser_err_heredoc_term(parser, ident_start, ident_length); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } +/** + * A special expect1 that attaches the error to the opening token location + * rather than the current position. This is useful for errors about missing + * closing tokens, where we want to point to the line with the opening token + * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file. + */ +static void +expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) { + if (accept1(parser, type)) return; + + const uint8_t *start = opening->start; + pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id); + + parser->previous.start = parser->previous.end; + parser->previous.type = 0; +} + +/** Flags for controlling expression parsing behavior. */ +#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1) +#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2) +#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4) +#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8) + static pm_node_t * -parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth); +parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth); /** * This is a wrapper of parse_expression, which also checks whether the * resulting node is a value expression. */ static pm_node_t * -parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) { - pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth); +parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { + pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth); pm_assert_value_expression(parser, node); return node; } @@ -13254,7 +12767,7 @@ parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bo * work in all cases, it may need to be refactored later. But it appears to work * for now. */ -static inline bool +static PRISM_INLINE bool token_begins_expression_p(pm_token_type_t type) { switch (type) { case PM_TOKEN_EQUAL_GREATER: @@ -13270,6 +12783,7 @@ token_begins_expression_p(pm_token_type_t type) { case PM_TOKEN_EOF: case PM_TOKEN_LAMBDA_BEGIN: case PM_TOKEN_KEYWORD_DO: + case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_ELSE: @@ -13315,14 +12829,89 @@ token_begins_expression_p(pm_token_type_t type) { * prefixed by the * operator. */ static pm_node_t * -parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) { +parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { if (accept1(parser, PM_TOKEN_USTAR)) { pm_token_t operator = parser->previous; - pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_splat_node_create(parser, &operator, expression); + pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + return UP(pm_splat_node_create(parser, &operator, expression)); } - return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth); + return parse_value_expression(parser, binding_power, flags, diag_id, depth); +} + +static bool +pm_node_unreference_each(const pm_node_t *node, void *data) { + switch (PM_NODE_TYPE(node)) { + /* When we are about to destroy a set of nodes that could potentially + * contain block exits for the current scope, we need to check if they + * are contained in the list of block exits and remove them if they are. + */ + case PM_BREAK_NODE: + case PM_NEXT_NODE: + case PM_REDO_NODE: { + pm_parser_t *parser = (pm_parser_t *) data; + size_t index = 0; + + while (index < parser->current_block_exits->size) { + pm_node_t *block_exit = parser->current_block_exits->nodes[index]; + + if (block_exit == node) { + if (index + 1 < parser->current_block_exits->size) { + memmove( + &parser->current_block_exits->nodes[index], + &parser->current_block_exits->nodes[index + 1], + (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *) + ); + } + parser->current_block_exits->size--; + + /* Note returning true here because these nodes could have + * arguments that are themselves block exits. */ + return true; + } + + index++; + } + + return true; + } + /* When an implicit local variable is written to or targeted, it becomes + * a regular, named local variable. This branch removes it from the list + * of implicit parameters when that happens. */ + case PM_LOCAL_VARIABLE_READ_NODE: + case PM_IT_LOCAL_VARIABLE_READ_NODE: { + pm_parser_t *parser = (pm_parser_t *) data; + pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters; + + for (size_t index = 0; index < implicit_parameters->size; index++) { + if (implicit_parameters->nodes[index] == node) { + /* If the node is not the last one in the list, we need to + * shift the remaining nodes down to fill the gap. This is + * extremely unlikely to happen. */ + if (index != implicit_parameters->size - 1) { + memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *)); + } + + implicit_parameters->size--; + break; + } + } + + return false; + } + default: + return true; + } +} + +/** + * When we are about to destroy a set of nodes that could potentially be + * referenced by one or more lists on the parser, then remove them from those + * lists so we don't get a use-after-free. + */ +static void +pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) { + pm_visit_node(node, pm_node_unreference_each, parser); } /** @@ -13337,16 +12926,12 @@ parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) { // append an =. pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field); size_t length = constant->length; - uint8_t *name = xcalloc(length + 1, sizeof(uint8_t)); - if (name == NULL) return; + uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1); memcpy(name, constant->start, length); name[length] = '='; - // Now switch the name to the new string. - // This silences clang analyzer warning about leak of memory pointed by `name`. - // NOLINTNEXTLINE(clang-analyzer-*) - *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1); + *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1); } /** @@ -13368,35 +12953,10 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) { default: break; } - pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end); + pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target)); pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0); - pm_node_destroy(parser, target); - return (pm_node_t *) result; -} - -/** - * When an implicit local variable is written to or targeted, it becomes a - * regular, named local variable. This function removes it from the list of - * implicit parameters when that happens. - */ -static void -parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) { - pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters; - - for (size_t index = 0; index < implicit_parameters->size; index++) { - if (implicit_parameters->nodes[index] == node) { - // If the node is not the last one in the list, we need to shift the - // remaining nodes down to fill the gap. This is extremely unlikely - // to happen. - if (index != implicit_parameters->size - 1) { - memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *)); - } - - implicit_parameters->size--; - break; - } - } + return UP(result); } /** @@ -13410,7 +12970,7 @@ parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) { static pm_node_t * parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) { switch (PM_NODE_TYPE(target)) { - case PM_MISSING_NODE: + case PM_ERROR_RECOVERY_NODE: return target; case PM_SOURCE_ENCODING_NODE: case PM_FALSE_NODE: @@ -13448,15 +13008,15 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p case PM_BACK_REFERENCE_READ_NODE: case PM_NUMBERED_REFERENCE_READ_NODE: PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY); - return target; + return UP(pm_error_recovery_node_create_unexpected(parser, target)); case PM_GLOBAL_VARIABLE_READ_NODE: assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t)); target->type = PM_GLOBAL_VARIABLE_TARGET_NODE; return target; case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { - PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start); - parse_target_implicit_parameter(parser, target); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target)); + pm_node_unreference(parser, target); } const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target; @@ -13471,10 +13031,9 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); - pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0); + pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0)); - parse_target_implicit_parameter(parser, target); - pm_node_destroy(parser, target); + pm_node_unreference(parser, target); return node; } @@ -13497,7 +13056,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p splat->expression = parse_target(parser, splat->expression, multiple, true); } - return (pm_node_t *) splat; + return UP(splat); } case PM_CALL_NODE: { pm_call_node_t *call = (pm_call_node_t *) target; @@ -13506,10 +13065,10 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // target then this is either a method call or a local variable // write. if ( - (call->message_loc.start != NULL) && - (call->message_loc.end[-1] != '!') && - (call->message_loc.end[-1] != '?') && - (call->opening_loc.start == NULL) && + (call->message_loc.length > 0) && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') && + (call->opening_loc.length == 0) && (call->arguments == NULL) && (call->block == NULL) ) { @@ -13523,21 +13082,19 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // When it was parsed in the prefix position, foo was seen as a // method call with no receiver and no arguments. Now we have an // =, so we know it's a local variable write. - const pm_location_t message_loc = call->message_loc; - - pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0); - pm_node_destroy(parser, target); + pm_location_t message_loc = call->message_loc; + pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0); - return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0); + return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0)); } - if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) { + if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) { if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) { pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION); } parse_write_name(parser, &call->name); - return (pm_node_t *) pm_call_target_node_create(parser, call); + return UP(pm_call_target_node_create(parser, call)); } } @@ -13545,7 +13102,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p // an aref expression, and we can transform it into an aset // expression. if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) { - return (pm_node_t *) pm_index_target_node_create(parser, call); + return UP(pm_index_target_node_create(parser, call)); } } PRISM_FALLTHROUGH @@ -13588,7 +13145,7 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) { pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser); if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) { - return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant); + return UP(pm_shareable_constant_node_create(parser, write, shareable_constant)); } return write; @@ -13600,16 +13157,14 @@ parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) { static pm_node_t * parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) { switch (PM_NODE_TYPE(target)) { - case PM_MISSING_NODE: - pm_node_destroy(parser, value); + case PM_ERROR_RECOVERY_NODE: return target; case PM_CLASS_VARIABLE_READ_NODE: { pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value); - pm_node_destroy(parser, target); - return (pm_node_t *) node; + return UP(node); } case PM_CONSTANT_PATH_NODE: { - pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value); + pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value)); if (context_def_p(parser)) { pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD); @@ -13618,13 +13173,12 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod return parse_shareable_constant_write(parser, node); } case PM_CONSTANT_READ_NODE: { - pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value); + pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value)); if (context_def_p(parser)) { pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD); } - pm_node_destroy(parser, target); return parse_shareable_constant_write(parser, node); } case PM_BACK_REFERENCE_READ_NODE: @@ -13633,45 +13187,40 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod PRISM_FALLTHROUGH case PM_GLOBAL_VARIABLE_READ_NODE: { pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value); - pm_node_destroy(parser, target); - return (pm_node_t *) node; + return UP(node); } case PM_LOCAL_VARIABLE_READ_NODE: { pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target; + pm_location_t location = target->location; pm_constant_id_t name = local_read->name; - pm_location_t name_loc = target->location; - uint32_t depth = local_read->depth; pm_scope_t *scope = pm_parser_scope_find(parser, depth); - if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) { + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) { pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED; - PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start); - parse_target_implicit_parameter(parser, target); + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target)); + pm_node_unreference(parser, target); } pm_locals_unread(&scope->locals, name); - pm_node_destroy(parser, target); - return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator); + return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator)); } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); - pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator); + pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator)); - parse_target_implicit_parameter(parser, target); - pm_node_destroy(parser, target); + pm_node_unreference(parser, target); return node; } case PM_INSTANCE_VARIABLE_READ_NODE: { - pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value); - pm_node_destroy(parser, target); + pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value)); return write_node; } case PM_MULTI_TARGET_NODE: - return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value); + return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value)); case PM_SPLAT_NODE: { pm_splat_node_t *splat = (pm_splat_node_t *) target; @@ -13680,9 +13229,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod } pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser); - pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat); + pm_multi_target_node_targets_append(parser, multi_target, UP(splat)); - return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value); + return UP(pm_multi_write_node_create(parser, multi_target, operator, value)); } case PM_CALL_NODE: { pm_call_node_t *call = (pm_call_node_t *) target; @@ -13691,10 +13240,10 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // target then this is either a method call or a local variable // write. if ( - (call->message_loc.start != NULL) && - (call->message_loc.end[-1] != '!') && - (call->message_loc.end[-1] != '?') && - (call->opening_loc.start == NULL) && + (call->message_loc.length > 0) && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') && + (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') && + (call->opening_loc.length == 0) && (call->arguments == NULL) && (call->block == NULL) ) { @@ -13708,19 +13257,18 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // When it was parsed in the prefix position, foo was seen as a // method call with no receiver and no arguments. Now we have an // =, so we know it's a local variable write. - const pm_location_t message = call->message_loc; + pm_location_t message_loc = call->message_loc; - pm_parser_local_add_location(parser, message.start, message.end, 0); - pm_node_destroy(parser, target); + pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length); + pm_parser_local_add_location(parser, &message_loc, 0); - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end); - target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator); + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc)); + target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator)); - pm_refute_numbered_parameter(parser, message.start, message.end); return target; } - if (char_is_identifier_start(parser, call->message_loc.start, parser->end - call->message_loc.start)) { + if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) { // When we get here, we have a method call, because it was // previously marked as a method call but now we have an =. This // looks like: @@ -13734,13 +13282,14 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod pm_arguments_node_t *arguments = pm_arguments_node_create(parser); call->arguments = arguments; - pm_arguments_node_arguments_append(arguments, value); - call->base.location.end = arguments->base.location.end; + pm_arguments_node_arguments_append(parser->arena, arguments, value); + PM_NODE_LENGTH_SET_NODE(call, arguments); + call->equal_loc = TOK2LOC(parser, operator); parse_write_name(parser, &call->name); - pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)); + pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)); - return (pm_node_t *) call; + return UP(call); } } @@ -13752,25 +13301,31 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod call->arguments = pm_arguments_node_create(parser); } - pm_arguments_node_arguments_append(call->arguments, value); - target->location.end = value->location.end; + pm_arguments_node_arguments_append(parser->arena, call->arguments, value); + PM_NODE_LENGTH_SET_NODE(target, value); // Replace the name with "[]=". call->name = pm_parser_constant_id_constant(parser, "[]=", 3); + call->equal_loc = TOK2LOC(parser, operator); // Ensure that the arguments for []= don't contain keywords pm_index_arguments_check(parser, call->arguments, call->block); - pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)); + pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)); return target; } - // If there are arguments on the call node, then it can't be a method - // call ending with = or a local variable write, so it must be a - // syntax error. In this case we'll fall through to our default + // If there are arguments on the call node, then it can't be a + // method call ending with = or a local variable write, so it must + // be a syntax error. In this case we'll fall through to our default // handling. We need to free the value that we parsed because there // is no way for us to attach it to the tree at this point. - pm_node_destroy(parser, value); + // + // Since it is possible for the value to contain an implicit + // parameter somewhere in its subtree, we need to walk it and remove + // any implicit parameters from the list of implicit parameters for + // the current scope. + pm_node_unreference(parser, value); } PRISM_FALLTHROUGH default: @@ -13801,11 +13356,10 @@ parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t default: break; } - pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1); + pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1); pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals); - pm_node_destroy(parser, target); - return (pm_node_t *) result; + return UP(result); } /** @@ -13838,35 +13392,35 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b pm_node_t *name = NULL; if (token_begins_expression_p(parser->current.type)) { - name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); name = parse_target(parser, name, true, true); } - pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name); + pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name)); pm_multi_target_node_targets_append(parser, result, splat); has_rest = true; } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { context_push(parser, PM_CONTEXT_MULTI_TARGET); - pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); + pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); target = parse_target(parser, target, true, false); pm_multi_target_node_targets_append(parser, result, target); context_pop(parser); } else if (token_begins_expression_p(parser->current.type)) { - pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); + pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); target = parse_target(parser, target, true, false); pm_multi_target_node_targets_append(parser, result, target); } else if (!match1(parser, PM_TOKEN_EOF)) { // If we get here, then we have a trailing , in a multi target node. // We'll add an implicit rest node to represent this. - pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous); + pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous)); pm_multi_target_node_targets_append(parser, result, rest); break; } } - return (pm_node_t *) result; + return UP(result); } /** @@ -13876,7 +13430,13 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b static pm_node_t * parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) { pm_node_t *result = parse_targets(parser, first_target, binding_power, depth); - accept1(parser, PM_TOKEN_NEWLINE); + + // If we're inside parentheses, then we allow a newline before the + // closing parenthesis or equals sign. Outside of parentheses, a newline + // is not allowed (e.g., `a, b\n= 1, 2` is not valid). + if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) { + accept1(parser, PM_TOKEN_NEWLINE); + } // Ensure that we have either an = or a ) after the targets. if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) { @@ -13905,7 +13465,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { context_push(parser, context); while (true) { - pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); + pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); pm_statements_node_body_append(parser, statements, node, true); // If we're recovering from a syntax error, then we need to stop parsing @@ -13945,7 +13505,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { // we were unable to parse an expression, then we will skip past this // token and continue parsing the statements list. Otherwise we'll add // an error and continue parsing the statements list. - if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) { + if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) { parser_lex(parser); // If we are at the end of the file, then we need to stop parsing @@ -13963,13 +13523,14 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { // This is an inlined version of accept1 because the error that we // want to add has varargs. If this happens again, we should // probably extract a helper function. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } context_pop(parser); + bool last_value = true; switch (context) { case PM_CONTEXT_BEGIN_ENSURE: @@ -13990,23 +13551,24 @@ parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) { */ static void pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { - const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true); + const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true); if (duplicated != NULL) { pm_buffer_t buffer = { 0 }; - pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated); + pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated); pm_diagnostic_list_append_format( + &parser->metadata_arena, &parser->warning_list, duplicated->location.start, - duplicated->location.end, + duplicated->location.length, PM_WARN_DUPLICATED_HASH_KEY, (int) pm_buffer_length(&buffer), pm_buffer_value(&buffer), - pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line + pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line ); - pm_buffer_free(&buffer); + pm_buffer_cleanup(&buffer); } } @@ -14018,14 +13580,15 @@ static void pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) { pm_node_t *previous; - if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) { + if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) { pm_diagnostic_list_append_format( + &parser->metadata_arena, &parser->warning_list, - node->location.start, - node->location.end, + PM_NODE_START(node), + PM_NODE_LENGTH(node), PM_WARN_DUPLICATED_WHEN_CLAUSE, - pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line, - pm_newline_list_line_column(&parser->newline_list, previous->location.start, parser->start_line).line + pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line, + pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line ); } } @@ -14053,14 +13616,14 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod // inner hash to share the static literals with the outer // hash. parser->current_hash_keys = literals; - value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1)); + value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1)); } else if (token_begins_expression_p(parser->current.type)) { - value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1)); + value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1)); } else { pm_parser_scope_forwarding_keywords_check(parser, &operator); } - element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator); + element = UP(pm_assoc_splat_node_create(parser, value, &operator)); contains_keyword_splat = true; break; } @@ -14068,44 +13631,43 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_token_t label = parser->current; parser_lex(parser); - pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label); + pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label)); pm_hash_key_static_literals_add(parser, literals, key); - pm_token_t operator = not_provided(parser); pm_node_t *value = NULL; if (token_begins_expression_p(parser->current.type)) { - value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1)); + value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1)); } else { if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) { pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 }; - value = (pm_node_t *) pm_constant_read_node_create(parser, &constant); + value = UP(pm_constant_read_node_create(parser, &constant)); } else { int depth = -1; pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }; if (identifier.end[-1] == '!' || identifier.end[-1] == '?') { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ); } else { depth = pm_parser_local_depth(parser, &identifier); } if (depth == -1) { - value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier); + value = UP(pm_call_node_variable_call_create(parser, &identifier)); } else { - value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth); + value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth)); } } - value->location.end++; - value = (pm_node_t *) pm_implicit_node_create(parser, value); + value->location.length++; + value = UP(pm_implicit_node_create(parser, value)); } - element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value); + element = UP(pm_assoc_node_create(parser, key, NULL, value)); break; } default: { - pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1)); + pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1)); // Hash keys that are strings are automatically frozen. We will // mark that here. @@ -14115,24 +13677,22 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod pm_hash_key_static_literals_add(parser, literals, key); - pm_token_t operator; - if (pm_symbol_node_label_p(key)) { - operator = not_provided(parser); - } else { + pm_token_t operator = { 0 }; + if (!pm_symbol_node_label_p(parser, key)) { expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET); operator = parser->previous; } - pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value); + pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); + element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value)); break; } } if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) { - pm_hash_node_elements_append((pm_hash_node_t *) node, element); + pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element); } else { - pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element); + pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element); } // If there's no comma after the element, then we're done. @@ -14153,23 +13713,47 @@ parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *nod return contains_keyword_splat; } +static PRISM_INLINE bool +argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) { + if (pm_symbol_node_label_p(parser, argument)) { + return true; + } + + switch (PM_NODE_TYPE(argument)) { + case PM_CALL_NODE: { + pm_call_node_t *cast = (pm_call_node_t *) argument; + if (cast->opening_loc.length == 0 && cast->arguments != NULL) { + if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) { + return false; + } + if (cast->block != NULL) { + return false; + } + } + break; + } + default: break; + } + return accept1(parser, PM_TOKEN_EQUAL_GREATER); +} + /** * Append an argument to a list of arguments. */ -static inline void +static PRISM_INLINE void parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) { if (arguments->arguments == NULL) { arguments->arguments = pm_arguments_node_create(parser); } - pm_arguments_node_arguments_append(arguments->arguments, argument); + pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument); } /** * Parse a list of arguments. */ static void -parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) { +parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) { pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left; // First we need to check if the next token is one that could be the start @@ -14202,16 +13786,16 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for } pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser); - argument = (pm_node_t *) hash; + argument = UP(hash); pm_static_literals_t hash_keys = { 0 }; - bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1)); + bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1)); parse_arguments_append(parser, arguments, argument); - pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS; - if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT; - pm_node_flag_set((pm_node_t *) arguments->arguments, flags); + pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS; + if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT; + pm_node_flag_set(UP(arguments->arguments), node_flags); pm_static_literals_free(&hash_keys); parsed_bare_hash = true; @@ -14224,12 +13808,12 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for pm_node_t *expression = NULL; if (token_begins_expression_p(parser->current.type)) { - expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1)); + expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1)); } else { pm_parser_scope_forwarding_block_check(parser, &operator); } - argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression); + argument = UP(pm_block_argument_node_create(parser, &operator, expression)); if (parsed_block_argument) { parse_arguments_append(parser, arguments, argument); } else { @@ -14249,18 +13833,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) { pm_parser_scope_forwarding_positionals_check(parser, &operator); - argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL); + argument = UP(pm_splat_node_create(parser, &operator, NULL)); if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); } } else { - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1)); + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1)); if (parsed_bare_hash) { - pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); + pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); } - argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression); + argument = UP(pm_splat_node_create(parser, &operator, expression)); } parse_arguments_append(parser, arguments, argument); @@ -14275,26 +13859,26 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for // not actually argument forwarding but was instead a // range. pm_token_t operator = parser->previous; - pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); // If we parse a range, we need to validate that we // didn't accidentally violate the nonassoc rules of the // ... operator. if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) { pm_range_node_t *range = (pm_range_node_t *) right; - pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR); + pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR); } - argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right); + argument = UP(pm_range_node_create(parser, NULL, &operator, right)); } else { pm_parser_scope_forwarding_all_check(parser, &parser->previous); if (parsed_first_argument && terminator == PM_TOKEN_EOF) { pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND); } - argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous); + argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous)); parse_arguments_append(parser, arguments, argument); - pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING); + pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING); arguments->has_forwarding = true; parsed_forwarding_arguments = true; break; @@ -14304,22 +13888,20 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for PRISM_FALLTHROUGH default: { if (argument == NULL) { - argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1)); + argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1)); } bool contains_keywords = false; bool contains_keyword_splat = false; - if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { + if (argument_allowed_for_bare_hash(parser, argument)) { if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH); } - pm_token_t operator; + pm_token_t operator = { 0 }; if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { operator = parser->previous; - } else { - operator = not_provided(parser); } pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser); @@ -14330,18 +13912,18 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for pm_hash_key_static_literals_add(parser, &hash_keys, argument); // Finish parsing the one we are part way through. - pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value); + pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); + argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value)); - pm_keyword_hash_node_elements_append(bare_hash, argument); - argument = (pm_node_t *) bare_hash; + pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument); + argument = UP(bare_hash); // Then parse more if we have a comma if (accept1(parser, PM_TOKEN_COMMA) && ( token_begins_expression_p(parser->current.type) || match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL) )) { - contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1)); + contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1)); } pm_static_literals_free(&hash_keys); @@ -14350,10 +13932,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for parse_arguments_append(parser, arguments, argument); - pm_node_flags_t flags = 0; - if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS; - if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT; - pm_node_flag_set((pm_node_t *) arguments->arguments, flags); + pm_node_flags_t node_flags = 0; + if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS; + if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT; + pm_node_flag_set(UP(arguments->arguments), node_flags); break; } @@ -14362,7 +13944,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for parsed_first_argument = true; // If parsing the argument failed, we need to stop parsing arguments. - if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break; + if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break; // If the terminator of these arguments is not EOF, then we have a // specific token we're looking for. In that case we can accept a @@ -14382,6 +13964,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for if (accepted_newline) { pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA); } + + // If this is a command call and an argument takes a block, + // there can be no further arguments. For example, + // `foo(bar 1 do end, 2)` should be rejected. + if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) { + pm_call_node_t *call = (pm_call_node_t *) argument; + if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) { + pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA); + break; + } + } } else { // If there is no comma at the end of the argument list then we're // done parsing arguments and can break out of this loop. @@ -14409,7 +14002,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) { expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER); pm_multi_target_node_t *node = pm_multi_target_node_create(parser); - pm_multi_target_node_opening_set(node, &parser->previous); + pm_multi_target_node_opening_set(parser, node, &parser->previous); do { pm_node_t *param; @@ -14419,33 +14012,33 @@ parse_required_destructured_parameter(pm_parser_t *parser) { // commas, so here we'll assume this is a mistake of the user not // knowing it's not allowed here. if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous); + param = UP(pm_implicit_rest_node_create(parser, &parser->previous)); pm_multi_target_node_targets_append(parser, node, param); pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA); break; } if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { - param = (pm_node_t *) parse_required_destructured_parameter(parser); + param = UP(parse_required_destructured_parameter(parser)); } else if (accept1(parser, PM_TOKEN_USTAR)) { pm_token_t star = parser->previous; pm_node_t *value = NULL; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { pm_token_t name = parser->previous; - value = (pm_node_t *) pm_required_parameter_node_create(parser, &name); + value = UP(pm_required_parameter_node_create(parser, &name)); if (pm_parser_parameter_name_check(parser, &name)) { pm_node_flag_set_repeated_parameter(value); } pm_parser_local_add_token(parser, &name, 1); } - param = (pm_node_t *) pm_splat_node_create(parser, &star, value); + param = UP(pm_splat_node_create(parser, &star, value)); } else { expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER); pm_token_t name = parser->previous; - param = (pm_node_t *) pm_required_parameter_node_create(parser, &name); + param = UP(pm_required_parameter_node_create(parser, &name)); if (pm_parser_parameter_name_check(parser, &name)) { pm_node_flag_set_repeated_parameter(param); } @@ -14457,7 +14050,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER); - pm_multi_target_node_closing_set(node, &parser->previous); + pm_multi_target_node_closing_set(parser, node, &parser->previous); return node; } @@ -14533,6 +14126,43 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord return true; } +static PRISM_INLINE void +parse_parameters_handle_trailing_comma( + pm_parser_t *parser, + pm_parameters_node_t *params, + pm_parameters_order_t order, + bool in_block, + bool allows_trailing_comma +) { + if (!allows_trailing_comma) { + pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA); + return; + } + + if (in_block) { + if (order >= PM_PARAMETERS_ORDER_NAMED) { + // foo do |bar,|; end + pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous)); + + if (params->rest == NULL) { + pm_parameters_node_rest_set(params, param); + } else { + pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI); + pm_parameters_node_posts_append(parser->arena, params, UP(param)); + } + } else { + // foo do |*bar,|; end + pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA); + } + } else { + // https://bugs.ruby-lang.org/issues/19107 + // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end` + if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) { + pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA); + } + } +} + /** * Parse a list of parameters on a method definition. */ @@ -14545,6 +14175,7 @@ parse_parameters( bool allows_forwarding_parameters, bool accepts_blocks_in_defaults, bool in_block, + pm_diagnostic_id_t diag_id_forwarding, uint16_t depth ) { pm_do_loop_stack_push(parser, false); @@ -14558,12 +14189,12 @@ parse_parameters( switch (parser->current.type) { case PM_TOKEN_PARENTHESIS_LEFT: { update_parameter_state(parser, &parser->current, &order); - pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser); + pm_node_t *param = UP(parse_required_destructured_parameter(parser)); if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) { - pm_parameters_node_requireds_append(params, param); + pm_parameters_node_requireds_append(parser->arena, params, param); } else { - pm_parameters_node_posts_append(params, param); + pm_parameters_node_posts_append(parser->arena, params, param); } break; } @@ -14573,34 +14204,40 @@ parse_parameters( parser_lex(parser); pm_token_t operator = parser->previous; - pm_token_t name; + pm_node_t *param; - bool repeated = false; - if (accept1(parser, PM_TOKEN_IDENTIFIER)) { - name = parser->previous; - repeated = pm_parser_parameter_name_check(parser, &name); - pm_parser_local_add_token(parser, &name, 1); + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) { + param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous); } else { - name = not_provided(parser); - parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK; - } + pm_token_t name = {0}; - pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator); - if (repeated) { - pm_node_flag_set_repeated_parameter((pm_node_t *)param); + bool repeated = false; + if (accept1(parser, PM_TOKEN_IDENTIFIER)) { + name = parser->previous; + repeated = pm_parser_parameter_name_check(parser, &name); + pm_parser_local_add_token(parser, &name, 1); + } else { + parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK; + } + + param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator); + if (repeated) { + pm_node_flag_set_repeated_parameter(param); + } } + if (params->block == NULL) { pm_parameters_node_block_set(params, param); } else { - pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI); - pm_parameters_node_posts_append(params, (pm_node_t *) param); + pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI); + pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param))); } break; } case PM_TOKEN_UDOT_DOT_DOT: { if (!allows_forwarding_parameters) { - pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES); + pm_parser_err_current(parser, diag_id_forwarding); } bool succeeded = update_parameter_state(parser, &parser->current, &order); @@ -14613,12 +14250,12 @@ parse_parameters( // If we already have a keyword rest parameter, then we replace it with the // forwarding parameter and move the keyword rest parameter to the posts list. pm_node_t *keyword_rest = params->keyword_rest; - pm_parameters_node_posts_append(params, keyword_rest); + pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest))); if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD); params->keyword_rest = NULL; } - pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param); + pm_parameters_node_keyword_rest_set(params, UP(param)); break; } case PM_TOKEN_CLASS_VARIABLE: @@ -14663,24 +14300,24 @@ parse_parameters( parser_lex(parser); pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name); - uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; + uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true); - pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1)); + pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1)); if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser); pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value); if (repeated) { - pm_node_flag_set_repeated_parameter((pm_node_t *) param); + pm_node_flag_set_repeated_parameter(UP(param)); } - pm_parameters_node_optionals_append(params, param); + pm_parameters_node_optionals_append(parser->arena, params, param); // If the value of the parameter increased the number of // reads of that parameter, then we need to warn that we // have a circular definition. - if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR); + if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR); } context_pop(parser); @@ -14695,15 +14332,15 @@ parse_parameters( } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) { pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name); if (repeated) { - pm_node_flag_set_repeated_parameter((pm_node_t *)param); + pm_node_flag_set_repeated_parameter(UP(param)); } - pm_parameters_node_requireds_append(params, (pm_node_t *) param); + pm_parameters_node_requireds_append(parser->arena, params, UP(param)); } else { pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name); if (repeated) { - pm_node_flag_set_repeated_parameter((pm_node_t *)param); + pm_node_flag_set_repeated_parameter(UP(param)); } - pm_parameters_node_posts_append(params, (pm_node_t *) param); + pm_parameters_node_posts_append(parser->arena, params, UP(param)); } break; @@ -14720,9 +14357,9 @@ parse_parameters( local.end -= 1; if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) { - pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT); + pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT); } else if (local.end[-1] == '!' || local.end[-1] == '?') { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE); } bool repeated = pm_parser_parameter_name_check(parser, &local); @@ -14734,12 +14371,12 @@ parse_parameters( case PM_TOKEN_PIPE: { context_pop(parser); - pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name); + pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name)); if (repeated) { pm_node_flag_set_repeated_parameter(param); } - pm_parameters_node_keywords_append(params, param); + pm_parameters_node_keywords_append(parser->arena, params, param); break; } case PM_TOKEN_SEMICOLON: @@ -14751,12 +14388,12 @@ parse_parameters( break; } - pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name); + pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name)); if (repeated) { pm_node_flag_set_repeated_parameter(param); } - pm_parameters_node_keywords_append(params, param); + pm_parameters_node_keywords_append(parser->arena, params, param); break; } default: { @@ -14764,20 +14401,20 @@ parse_parameters( if (token_begins_expression_p(parser->current.type)) { pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local); - uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; + uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true); - pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1)); + pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1)); if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser); - if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR); + if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR); } - param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value); + param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value)); } else { - param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name); + param = UP(pm_required_keyword_parameter_node_create(parser, &name)); } if (repeated) { @@ -14785,7 +14422,7 @@ parse_parameters( } context_pop(parser); - pm_parameters_node_keywords_append(params, param); + pm_parameters_node_keywords_append(parser->arena, params, param); // If parsing the value of the parameter resulted in error recovery, // then we can put a missing node in its place and stop parsing the @@ -14806,7 +14443,7 @@ parse_parameters( parser_lex(parser); pm_token_t operator = parser->previous; - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -14814,11 +14451,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS; } - pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name); + pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name))); if (repeated) { pm_node_flag_set_repeated_parameter(param); } @@ -14827,7 +14463,7 @@ parse_parameters( pm_parameters_node_rest_set(params, param); } else { pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI); - pm_parameters_node_posts_append(params, param); + pm_parameters_node_posts_append(parser->arena, params, param); } break; @@ -14846,9 +14482,9 @@ parse_parameters( pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW); } - param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous); + param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous)); } else { - pm_token_t name; + pm_token_t name = { 0 }; bool repeated = false; if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -14856,11 +14492,10 @@ parse_parameters( repeated = pm_parser_parameter_name_check(parser, &name); pm_parser_local_add_token(parser, &name, 1); } else { - name = not_provided(parser); parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS; } - param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name); + param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name))); if (repeated) { pm_node_flag_set_repeated_parameter(param); } @@ -14870,27 +14505,14 @@ parse_parameters( pm_parameters_node_keyword_rest_set(params, param); } else { pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI); - pm_parameters_node_posts_append(params, param); + pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param))); } break; } default: if (parser->previous.type == PM_TOKEN_COMMA) { - if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) { - // If we get here, then we have a trailing comma in a - // block parameter list. - pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous); - - if (params->rest == NULL) { - pm_parameters_node_rest_set(params, param); - } else { - pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI); - pm_parameters_node_posts_append(params, (pm_node_t *) param); - } - } else { - pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA); - } + parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma); } parsing = false; @@ -14922,8 +14544,7 @@ parse_parameters( pm_do_loop_stack_pop(parser); // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`. - if (params->base.location.start == params->base.location.end) { - pm_node_destroy(parser, (pm_node_t *) params); + if (PM_NODE_START(params) == PM_NODE_END(params)) { return NULL; } @@ -14940,13 +14561,13 @@ token_newline_index(const pm_parser_t *parser) { // This is the common case. In this case we can look at the previously // recorded newline in the newline list and subtract from the current // offset. - return parser->newline_list.size - 1; + return parser->line_offsets.size - 1; } else { // This is unlikely. This is the case that we have already parsed the // start of a heredoc, so we cannot rely on looking at the previous // offset of the newline list, and instead must go through the whole // process of a binary search for the line number. - return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0); + return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0); } } @@ -14956,7 +14577,7 @@ token_newline_index(const pm_parser_t *parser) { */ static int64_t token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) { - const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index]; + const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index]; const uint8_t *end = token->start; // Skip over the BOM if it is present. @@ -15020,8 +14641,8 @@ parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_ind // Otherwise, add a warning. PM_PARSER_WARN_FORMAT( parser, - closing_token->start, - closing_token->end, + PM_TOKEN_START(parser, closing_token), + PM_TOKEN_LENGTH(closing_token), PM_WARN_INDENTATION_MISMATCH, (int) (closing_token->end - closing_token->start), (const char *) closing_token->start, @@ -15045,7 +14666,7 @@ typedef enum { * Parse any number of rescue clauses. This will form a linked list of if * nodes pointing to each other from the top. */ -static inline void +static PRISM_INLINE void parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) { pm_rescue_node_t *current = NULL; @@ -15061,9 +14682,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // we're going to have an empty list of exceptions to rescue (which // implies StandardError). parser_lex(parser); - pm_rescue_node_operator_set(rescue, &parser->previous); + pm_rescue_node_operator_set(parser, rescue, &parser->previous); - pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); + pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); pm_rescue_node_reference_set(rescue, reference); @@ -15072,8 +14693,8 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ case PM_TOKEN_NEWLINE: case PM_TOKEN_SEMICOLON: case PM_TOKEN_KEYWORD_THEN: - // Here we have a terminator for the rescue keyword, in which case we're - // going to just continue on. + // Here we have a terminator for the rescue keyword, in which + // case we're going to just continue on. break; default: { if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) { @@ -15082,7 +14703,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ do { pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1)); - pm_rescue_node_exceptions_append(rescue, expression); + pm_rescue_node_exceptions_append(parser->arena, rescue, expression); // If we hit a newline, then this is the end of the rescue expression. We // can continue on to parse the statements. @@ -15091,9 +14712,9 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // If we hit a `=>` then we're going to parse the exception variable. Once // we've done that, we'll break out of the loop and parse the statements. if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) { - pm_rescue_node_operator_set(rescue, &parser->previous); + pm_rescue_node_operator_set(parser, rescue, &parser->previous); - pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); + pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1)); reference = parse_target(parser, reference, false, false); pm_rescue_node_reference_set(rescue, reference); @@ -15105,9 +14726,12 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ } if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - accept1(parser, PM_TOKEN_KEYWORD_THEN); + if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { + rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous); + } } else { expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM); + rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous); } if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) { @@ -15145,11 +14769,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // since we won't know the end until we've found all subsequent // clauses. This sets the end location on all rescues once we know it. if (current != NULL) { - const uint8_t *end_to_set = current->base.location.end; pm_rescue_node_t *clause = parent_node->rescue_clause; while (clause != NULL) { - clause->base.location.end = end_to_set; + PM_NODE_LENGTH_SET_NODE(clause, current); clause = clause->subsequent; } } @@ -15192,7 +14815,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ // If we don't have a `current` rescue node, then this is a dangling // else, and it's an error. - if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE); + if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE); } if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) { @@ -15230,10 +14853,10 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ if (match1(parser, PM_TOKEN_KEYWORD_END)) { if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false); - pm_begin_node_end_keyword_set(parent_node, &parser->current); + pm_begin_node_end_keyword_set(parser, parent_node, &parser->current); } else { - pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - pm_begin_node_end_keyword_set(parent_node, &end_keyword); + pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end }; + pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword); } } @@ -15243,11 +14866,11 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_ */ static pm_begin_node_t * parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) { - pm_token_t begin_keyword = not_provided(parser); - pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements); - + pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements); parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1)); - node->base.location.start = start; + + node->base.location.start = U32(start - parser->start); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current); return node; } @@ -15266,6 +14889,9 @@ parse_block_parameters( ) { pm_parameters_node_t *parameters = NULL; if (!match1(parser, PM_TOKEN_SEMICOLON)) { + if (!is_lambda_literal) { + context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS); + } parameters = parse_parameters( parser, is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX, @@ -15274,12 +14900,16 @@ parse_block_parameters( false, accepts_blocks_in_defaults, true, + is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK, (uint16_t) (depth + 1) ); + if (!is_lambda_literal) { + context_pop(parser); + } } pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening); - if ((opening->type != PM_TOKEN_NOT_PROVIDED)) { + if (opening != NULL) { accept1(parser, PM_TOKEN_NEWLINE); if (accept1(parser, PM_TOKEN_SEMICOLON)) { @@ -15310,9 +14940,9 @@ parse_block_parameters( pm_parser_local_add_token(parser, &parser->previous, 1); pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous); - if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local); + if (repeated) pm_node_flag_set_repeated_parameter(UP(local)); - pm_block_parameters_node_append_local(block_parameters, local); + pm_block_parameters_node_append_local(parser->arena, block_parameters, local); } while (accept1(parser, PM_TOKEN_COMMA)); } } @@ -15392,8 +15022,8 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_ pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK); } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) { pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK); - } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0')); + } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0')); } else { assert(false && "unreachable"); } @@ -15412,13 +15042,11 @@ parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_ for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) { scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER; } - - const pm_location_t location = { .start = opening->start, .end = closing->end }; - return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter); + return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter)); } if (it_parameter) { - return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing); + return UP(pm_it_parameters_node_create(parser, opening, closing)); } return NULL; @@ -15450,7 +15078,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) { expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM); } - pm_block_parameters_node_closing_set(block_parameters, &parser->previous); + pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous); } accept1(parser, PM_TOKEN_NEWLINE); @@ -15458,30 +15086,30 @@ parse_block(pm_parser_t *parser, uint16_t depth) { if (opening.type == PM_TOKEN_BRACE_LEFT) { if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) { - statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)); + statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1))); } - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening); } else { if (!match1(parser, PM_TOKEN_KEYWORD_END)) { if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) { pm_accepts_block_stack_push(parser, true); - statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)); + statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1))); pm_accepts_block_stack_pop(parser); } if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)); + statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1))); } } - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening); } pm_constant_id_list_t locals; pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser)); - pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous); + pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous); pm_parser_scope_pop(parser); pm_accepts_block_stack_pop(parser); @@ -15495,42 +15123,54 @@ parse_block(pm_parser_t *parser, uint16_t depth) { * arguments, or blocks). */ static bool -parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) { +parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) { + /* Fast path: if the current token can't begin an expression and isn't + * a parenthesis, block opener, or splat/block-pass operator, there are + * no arguments to parse. */ + if ( + !token_begins_expression_p(parser->current.type) && + !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND) + ) { + return false; + } + bool found = false; + bool parsed_command_args = false; if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { found |= true; - arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->opening_loc = TOK2LOC(parser, &parser->previous); if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->closing_loc = TOK2LOC(parser, &parser->previous); } else { pm_accepts_block_stack_push(parser, true); - parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1)); + parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1)); if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } pm_accepts_block_stack_pop(parser); - arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments->closing_loc = TOK2LOC(parser, &parser->previous); } - } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) { + } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) { found |= true; + parsed_command_args = true; pm_accepts_block_stack_push(parser, false); // If we get here, then the subsequent token cannot be used as an infix // operator. In this case we assume the subsequent token is part of an // argument to this method call. - parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1)); + parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1)); // If we have done with the arguments and still not consumed the comma, // then we have a trailing comma where we need to check whether it is // allowed or not. if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type)); } pm_accepts_block_stack_pop(parser); @@ -15549,21 +15189,24 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) { found |= true; block = parse_block(parser, (uint16_t) (depth + 1)); + } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) { + found |= true; + block = parse_block(parser, (uint16_t) (depth + 1)); } if (block != NULL) { if (arguments->block == NULL && !arguments->has_forwarding) { - arguments->block = (pm_node_t *) block; + arguments->block = UP(block); } else { - pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI); + pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI); if (arguments->block != NULL) { if (arguments->arguments == NULL) { arguments->arguments = pm_arguments_node_create(parser); } - pm_arguments_node_arguments_append(arguments->arguments, arguments->block); + pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block); } - arguments->block = (pm_node_t *) block; + arguments->block = UP(block); } } } @@ -15631,6 +15274,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) { case PM_CONTEXT_BLOCK_ENSURE: case PM_CONTEXT_BLOCK_KEYWORDS: case PM_CONTEXT_BLOCK_RESCUE: + case PM_CONTEXT_BLOCK_PARAMETERS: case PM_CONTEXT_DEF_ELSE: case PM_CONTEXT_DEF_ENSURE: case PM_CONTEXT_DEF_PARAMS: @@ -15650,7 +15294,7 @@ parse_return(pm_parser_t *parser, pm_node_t *node) { break; } } - if (in_sclass) { + if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) { pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID); } } @@ -15667,6 +15311,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) { case PM_CONTEXT_BLOCK_KEYWORDS: case PM_CONTEXT_BLOCK_ELSE: case PM_CONTEXT_BLOCK_ENSURE: + case PM_CONTEXT_BLOCK_PARAMETERS: case PM_CONTEXT_BLOCK_RESCUE: case PM_CONTEXT_DEFINED: case PM_CONTEXT_FOR: @@ -15676,12 +15321,19 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) { case PM_CONTEXT_LAMBDA_ENSURE: case PM_CONTEXT_LAMBDA_RESCUE: case PM_CONTEXT_LOOP_PREDICATE: - case PM_CONTEXT_POSTEXE: case PM_CONTEXT_UNTIL: case PM_CONTEXT_WHILE: // These are the good cases. We're allowed to have a block exit // in these contexts. return; + case PM_CONTEXT_POSTEXE: + // https://bugs.ruby-lang.org/issues/20409 + if (context_node->context == PM_CONTEXT_POSTEXE) { + if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) { + return; + } + } + PRISM_FALLTHROUGH case PM_CONTEXT_DEF: case PM_CONTEXT_DEF_PARAMS: case PM_CONTEXT_DEF_ELSE: @@ -15703,7 +15355,7 @@ parse_block_exit(pm_parser_t *parser, pm_node_t *node) { // block exit to the list of exits for the expression, and // the node parsing will handle validating it instead. assert(parser->current_block_exits != NULL); - pm_node_list_append(parser->current_block_exits, node); + pm_node_list_append(parser->arena, parser->current_block_exits, node); return; case PM_CONTEXT_BEGIN_ELSE: case PM_CONTEXT_BEGIN_ENSURE: @@ -15794,7 +15446,7 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) { // However, they could still become valid in a higher level context if // there is another list above this one. In this case we'll push all of // the block exits up to the previous list. - pm_node_list_concat(previous_block_exits, parser->current_block_exits); + pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits); parser->current_block_exits = previous_block_exits; } else { // If we did not match a trailing while/until and this was the last @@ -15804,11 +15456,11 @@ pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) { } } -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) { context_push(parser, PM_CONTEXT_PREDICATE); pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE; - pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1)); + pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1)); // Predicates are closed by a term, a "then", or a term and then a "then". bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); @@ -15826,15 +15478,15 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex return predicate; } -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) { pm_node_list_t current_block_exits = { 0 }; pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); pm_token_t keyword = parser->previous; - pm_token_t then_keyword = not_provided(parser); + pm_token_t then_keyword = { 0 }; - pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1)); + pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_COMPOSITION, context, &then_keyword, (uint16_t) (depth + 1)); pm_statements_node_t *statements = NULL; if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { @@ -15844,15 +15496,14 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); } - pm_token_t end_keyword = not_provided(parser); pm_node_t *parent = NULL; switch (context) { case PM_CONTEXT_IF: - parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword); + parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL)); break; case PM_CONTEXT_UNLESS: - parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements); + parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements)); break; default: assert(false && "unreachable"); @@ -15866,21 +15517,21 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl if (context == PM_CONTEXT_IF) { while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) { if (parser_end_of_line_p(parser)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL); } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); pm_token_t elsif_keyword = parser->current; parser_lex(parser); - pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1)); + pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_COMPOSITION, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1)); pm_accepts_block_stack_push(parser, true); pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword); + pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL)); ((pm_if_node_t *) current)->subsequent = elsif; current = elsif; } @@ -15899,13 +15550,13 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword); pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous); switch (context) { case PM_CONTEXT_IF: - ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node; + ((pm_if_node_t *) current)->subsequent = UP(else_node); break; case PM_CONTEXT_UNLESS: ((pm_unless_node_t *) parent)->else_clause = else_node; @@ -15916,7 +15567,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl } } else { parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword); } // Set the appropriate end location for all of the nodes in the subtree. @@ -15928,12 +15579,12 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl while (recursing) { switch (PM_NODE_TYPE(current)) { case PM_IF_NODE: - pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous); + pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous); current = ((pm_if_node_t *) current)->subsequent; recursing = current != NULL; break; case PM_ELSE_NODE: - pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous); + pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous); recursing = false; break; default: { @@ -15945,7 +15596,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl break; } case PM_CONTEXT_UNLESS: - pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous); + pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous); break; default: assert(false && "unreachable"); @@ -15953,8 +15604,6 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl } pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - return parent; } @@ -15965,7 +15614,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl #define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \ case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \ case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \ - case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \ + case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \ case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \ case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \ case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \ @@ -16028,7 +15677,7 @@ PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int * If the encoding was explicitly set through the lexing process, then we need * to potentially mark the string's flags to indicate how to encode it. */ -static inline pm_node_flags_t +static PRISM_INLINE pm_node_flags_t parse_unescaped_encoding(const pm_parser_t *parser) { if (parser->explicit_encoding != NULL) { if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) { @@ -16060,10 +15709,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { // "aaa #{bbb} #@ccc ddd" // ^^^^ ^ ^^^^ case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); + pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); pm_node_flag_set(node, parse_unescaped_encoding(parser)); parser_lex(parser); @@ -16090,7 +15736,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { pm_token_t opening = parser->previous; pm_statements_node_t *statements = NULL; - if (!match1(parser, PM_TOKEN_EMBEXPR_END)) { + if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { pm_accepts_block_stack_push(parser, true); statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); @@ -16098,9 +15744,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { parser->brace_nesting = brace_nesting; lex_state_set(parser, state); - expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END); - pm_token_t closing = parser->previous; // If this set of embedded statements only contains a single // statement, then Ruby does not consider it as a possible statement @@ -16109,7 +15753,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE); } - return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing); + return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous)); } // Here the lexer has returned the beginning of an embedded variable. @@ -16134,42 +15778,42 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) { // create a global variable read node. case PM_TOKEN_BACK_REFERENCE: parser_lex(parser); - variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous); + variable = UP(pm_back_reference_read_node_create(parser, &parser->previous)); break; // In this case an nth reference is being interpolated. We'll // create a global variable read node. case PM_TOKEN_NUMBERED_REFERENCE: parser_lex(parser); - variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous); + variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous)); break; // In this case a global variable is being interpolated. We'll // create a global variable read node. case PM_TOKEN_GLOBAL_VARIABLE: parser_lex(parser); - variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous); + variable = UP(pm_global_variable_read_node_create(parser, &parser->previous)); break; // In this case an instance variable is being interpolated. // We'll create an instance variable read node. case PM_TOKEN_INSTANCE_VARIABLE: parser_lex(parser); - variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous); + variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous)); break; // In this case a class variable is being interpolated. We'll // create a class variable read node. case PM_TOKEN_CLASS_VARIABLE: parser_lex(parser); - variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous); + variable = UP(pm_class_variable_read_node_create(parser, &parser->previous)); break; // We can hit here if we got an invalid token. In that case // we'll not attempt to lex this token and instead just return a // missing node. default: expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID); - variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); break; } - return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable); + return UP(pm_embedded_variable_node_create(parser, &operator, variable)); } default: parser_lex(parser); @@ -16197,18 +15841,16 @@ parse_operator_symbol_name(const pm_token_t *name) { static pm_node_t * parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) { - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL); const uint8_t *end = parse_operator_symbol_name(&parser->current); if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); parser_lex(parser); pm_string_shared_init(&symbol->unescaped, parser->previous.start, end); - pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING); + pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING); - return (pm_node_t *) symbol; + return UP(symbol); } /** @@ -16242,13 +15884,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s break; } - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); - pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); + pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); - return (pm_node_t *) symbol; + return UP(symbol); } if (lex_mode->as.string.interpolation) { @@ -16256,10 +15896,13 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s if (match1(parser, PM_TOKEN_STRING_END)) { if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); parser_lex(parser); + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->previous.start, + .end = parser->previous.start + }; - pm_token_t content = not_provided(parser); - pm_token_t closing = parser->previous; - return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing); + return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous)); } // Now we can parse the first part of the symbol. @@ -16271,15 +15914,15 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state); expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED); - return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous); + return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous)); } pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening); - if (part) pm_interpolated_symbol_node_append(symbol, part); + if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part); while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_interpolated_symbol_node_append(symbol, part); + pm_interpolated_symbol_node_append(parser->arena, symbol, part); } } @@ -16290,8 +15933,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED); } - pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous); - return (pm_node_t *) symbol; + pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous); + return UP(symbol); } pm_token_t content; @@ -16313,13 +15956,11 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s // interpolated string node, so that's what we'll do here. if (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening); - pm_token_t bounds = not_provided(parser); - - pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped); - pm_interpolated_symbol_node_append(symbol, part); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped)); + pm_interpolated_symbol_node_append(parser->arena, symbol, part); - part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string); - pm_interpolated_symbol_node_append(symbol, part); + part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string)); + pm_interpolated_symbol_node_append(parser->arena, symbol, part); if (next_state != PM_LEX_STATE_NONE) { lex_state_set(parser, next_state); @@ -16328,8 +15969,8 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s parser_lex(parser); expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC); - pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous); - return (pm_node_t *) symbol; + pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous); + return UP(symbol); } } else { content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end }; @@ -16346,34 +15987,29 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC); } - return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)); + return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false))); } /** * Parse an argument to undef which can either be a bare word, a symbol, a * constant, or an interpolated symbol. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_undef_argument(pm_parser_t *parser, uint16_t depth) { switch (parser->current.type) { - case PM_CASE_OPERATOR: { - const pm_token_t opening = not_provided(parser); - return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE); - } + case PM_CASE_OPERATOR: + return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE); case PM_CASE_KEYWORD: case PM_TOKEN_CONSTANT: case PM_TOKEN_IDENTIFIER: case PM_TOKEN_METHOD_NAME: { parser_lex(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); - pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); + pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); - return (pm_node_t *) symbol; + return UP(symbol); } case PM_TOKEN_SYMBOL_BEGIN: { pm_lex_mode_t lex_mode = *parser->lex_modes.current; @@ -16383,7 +16019,7 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { } default: pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT); - return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } @@ -16393,13 +16029,11 @@ parse_undef_argument(pm_parser_t *parser, uint16_t depth) { * we need to set the lex state to PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM * between the first and second arguments. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { switch (parser->current.type) { - case PM_CASE_OPERATOR: { - const pm_token_t opening = not_provided(parser); - return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE); - } + case PM_CASE_OPERATOR: + return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE); case PM_CASE_KEYWORD: case PM_TOKEN_CONSTANT: case PM_TOKEN_IDENTIFIER: @@ -16407,14 +16041,11 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM); parser_lex(parser); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing); - + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL); pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end); - pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); + pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false)); - return (pm_node_t *) symbol; + return UP(symbol); } case PM_TOKEN_SYMBOL_BEGIN: { pm_lex_mode_t lex_mode = *parser->lex_modes.current; @@ -16424,16 +16055,16 @@ parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) { } case PM_TOKEN_BACK_REFERENCE: parser_lex(parser); - return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous); + return UP(pm_back_reference_read_node_create(parser, &parser->previous)); case PM_TOKEN_NUMBERED_REFERENCE: parser_lex(parser); - return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous); + return UP(pm_numbered_reference_read_node_create(parser, &parser->previous)); case PM_TOKEN_GLOBAL_VARIABLE: parser_lex(parser); - return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous); + return UP(pm_global_variable_read_node_create(parser, &parser->previous)); default: pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT); - return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } } @@ -16445,10 +16076,10 @@ static pm_node_t * parse_variable(pm_parser_t *parser) { pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous); int depth; - bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end); + bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) { - return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false); + return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false)); } pm_scope_t *current_scope = parser->current_scope; @@ -16467,13 +16098,13 @@ parse_variable(pm_parser_t *parser) { parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND; } - pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false); - pm_node_list_append(¤t_scope->implicit_parameters, node); + pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false)); + pm_node_list_append(parser->arena, ¤t_scope->implicit_parameters, node); return node; - } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) { - pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous); - pm_node_list_append(¤t_scope->implicit_parameters, node); + } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) { + pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous)); + pm_node_list_append(parser->arena, ¤t_scope->implicit_parameters, node); return node; } @@ -16496,9 +16127,9 @@ parse_variable_call(pm_parser_t *parser) { } pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous); - pm_node_flag_set((pm_node_t *)node, flags); + pm_node_flag_set(UP(node), flags); - return (pm_node_t *) node; + return UP(node); } /** @@ -16506,7 +16137,7 @@ parse_variable_call(pm_parser_t *parser) { * parser. If it does not match a valid method definition name, then a missing * token is returned. */ -static inline pm_token_t +static PRISM_INLINE pm_token_t parse_method_definition_name(pm_parser_t *parser) { switch (parser->current.type) { case PM_CASE_KEYWORD: @@ -16515,7 +16146,7 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; case PM_TOKEN_IDENTIFIER: - pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end); + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)); parser_lex(parser); return parser->previous; case PM_CASE_OPERATOR: @@ -16523,22 +16154,31 @@ parse_method_definition_name(pm_parser_t *parser) { parser_lex(parser); return parser->previous; default: - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type)); - return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end }; + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type)); + return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end }; } } static void -parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) { - // Get a reference to the string struct that is being held by the string - // node. This is the value we're going to actually manipulate. - pm_string_ensure_owned(string); +parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) { + // Make a writable copy in the arena if the string isn't already writable. + // We keep a mutable pointer to the arena memory so we can memmove into it + // below without casting away const from the string's source field. + uint8_t *writable; + + if (string->type != PM_STRING_OWNED) { + size_t length = pm_string_length(string); + writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t)); + pm_string_constant_init(string, (const char *) writable, length); + } else { + writable = (uint8_t *) string->source; + } // Now get the bounds of the existing string. We'll use this as a // destination to move bytes into. We'll also use it for bounds checking // since we don't require that these strings be null terminated. size_t dest_length = pm_string_length(string); - const uint8_t *source_cursor = (uint8_t *) string->source; + const uint8_t *source_cursor = writable; const uint8_t *source_end = source_cursor + dest_length; // We're going to move bytes backward in the string when we get leading @@ -16562,11 +16202,24 @@ parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) { dest_length--; } - memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor)); + memmove(writable, source_cursor, (size_t) (source_end - source_cursor)); string->length = dest_length; } /** + * If we end up trimming all of the whitespace from a node and it isn't + * part of a line continuation, then we'll drop it from the list entirely. + */ +static PRISM_INLINE bool +heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) { + if (string_node->unescaped.length == 0) { + const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc); + return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL; + } + return false; +} + +/** * Take a heredoc node that is indented by a ~ and trim the leading whitespace. */ static void @@ -16576,8 +16229,7 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w bool dedent_next = true; // Iterate over all nodes, and trim whitespace accordingly. We're going to - // keep around two indices: a read and a write. If we end up trimming all of - // the whitespace from a node, then we'll drop it from the list entirely. + // keep around two indices: a read and a write. size_t write_index = 0; pm_node_t *node; @@ -16593,11 +16245,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w pm_string_node_t *string_node = ((pm_string_node_t *) node); if (dedent_next) { - parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace); + parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace); } - if (string_node->unescaped.length == 0) { - pm_node_destroy(parser, node); + if (heredoc_dedent_discard_string_node(parser, string_node)) { } else { nodes->nodes[write_index++] = node; } @@ -16620,7 +16271,7 @@ parse_strings_empty_content(const uint8_t *location) { /** * Parse a set of strings that could be concatenated together. */ -static inline pm_node_t * +static PRISM_INLINE pm_node_t * parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) { assert(parser->current.type == PM_TOKEN_STRING_BEGIN); bool concating = false; @@ -16647,16 +16298,14 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous); pm_string_shared_init(&string->unescaped, content.start, content.end); - node = (pm_node_t *) string; + node = UP(string); } else if (accept1(parser, PM_TOKEN_LABEL_END)) { // If we get here, then we have an end of a label immediately // after a start. In that case we'll create an empty symbol // node. - pm_token_t content = parse_strings_empty_content(parser->previous.start); - pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous); - - pm_string_shared_init(&symbol->unescaped, content.start, content.end); - node = (pm_node_t *) symbol; + pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous); + pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start); + node = UP(symbol); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); } else if (!lex_interpolation) { @@ -16667,7 +16316,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 if (match1(parser, PM_TOKEN_EOF)) { unescaped = PM_STRING_EMPTY; - content = not_provided(parser); + content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start }; } else { unescaped = parser->current_string; expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT); @@ -16687,34 +16336,30 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // be able to contain all of the parts. if (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_node_list_t parts = { 0 }; - - pm_token_t delimiters = not_provided(parser); - pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped); - pm_node_list_append(&parts, part); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped)); + pm_node_list_append(parser->arena, &parts, part); do { - part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters); - pm_node_list_append(&parts, part); + part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + pm_node_list_append(parser->arena, &parts, part); parser_lex(parser); } while (match1(parser, PM_TOKEN_STRING_CONTENT)); expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF); - node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); - - pm_node_list_free(&parts); + node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous)); } else if (accept1(parser, PM_TOKEN_LABEL_END)) { - node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)); + node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true))); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); } else if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF); - node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped); + node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped)); } else if (accept1(parser, PM_TOKEN_STRING_END)) { - node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped); + node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } else { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; - node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped); + parser->previous.type = 0; + node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped)); } } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) { // In this case we've hit string content so we know the string @@ -16726,7 +16371,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 parser_lex(parser); if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { - node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped); + node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped)); pm_node_flag_set(node, parse_unescaped_encoding(parser)); // Kind of odd behavior, but basically if we have an @@ -16736,43 +16381,38 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 if (!accept1(parser, PM_TOKEN_STRING_END)) { const uint8_t *location = parser->previous.end; if (location > parser->start && location[-1] == '\n') location--; - pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF); + pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } } else if (accept1(parser, PM_TOKEN_LABEL_END)) { - node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)); + node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true))); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); } else { // If we get here, then we have interpolation so we'll need // to create a string or symbol node with interpolation. pm_node_list_t parts = { 0 }; - pm_token_t string_opening = not_provided(parser); - pm_token_t string_closing = not_provided(parser); - - pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); pm_node_flag_set(part, parse_unescaped_encoding(parser)); - pm_node_list_append(&parts, part); + pm_node_list_append(parser->arena, &parts, part); while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) { if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_node_list_append(&parts, part); + pm_node_list_append(parser->arena, &parts, part); } } if (accept1(parser, PM_TOKEN_LABEL_END)) { - node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); + node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous)); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); } else if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM); - node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current); + node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current)); } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM); - node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous)); } - - pm_node_list_free(&parts); } } else { // If we get here, then the first part of the string is not plain @@ -16783,22 +16423,20 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) { if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_node_list_append(&parts, part); + pm_node_list_append(parser->arena, &parts, part); } } if (accept1(parser, PM_TOKEN_LABEL_END)) { - node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); + node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous)); if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL); } else if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM); - node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current); + node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current)); } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM); - node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous)); } - - pm_node_list_free(&parts); } if (current == NULL) { @@ -16823,15 +16461,17 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 // If we haven't already created our container for concatenation, // we'll do that now. if (!concating) { - concating = true; - pm_token_t bounds = not_provided(parser); + if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION); + } - pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds); - pm_interpolated_string_node_append(container, current); - current = (pm_node_t *) container; + concating = true; + pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, container, current); + current = UP(container); } - pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node); + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node); } } @@ -16853,12 +16493,12 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag static void parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) { // Skip this capture if it starts with an underscore. - if (*location->start == '_') return; + if (peek_at(parser, parser->start + location->start) == '_') return; if (pm_constant_id_list_includes(captures, capture)) { - pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE); + pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE); } else { - pm_constant_id_list_append(captures, capture); + pm_constant_id_list_append(parser->arena, captures, capture); } } @@ -16872,7 +16512,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures while (accept1(parser, PM_TOKEN_COLON_COLON)) { pm_token_t delimiter = parser->previous; expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); - node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous); + node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous)); } // If there is a [ or ( that follows, then this is part of a larger pattern @@ -16893,7 +16533,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET); + expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening); } closing = parser->previous; @@ -16905,7 +16545,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); + expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening); } closing = parser->previous; @@ -16914,7 +16554,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures if (!inner) { // If there was no inner pattern, then we have something like Foo() or // Foo[]. In that case we'll create an array pattern with no requireds. - return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing); + return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing)); } // Now that we have the inner pattern, check to see if it's an array, find, @@ -16925,15 +16565,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_ARRAY_PATTERN_NODE: { pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); - return (pm_node_t *) pattern_node; + return UP(pattern_node); } break; @@ -16941,15 +16581,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_FIND_PATTERN_NODE: { pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); - return (pm_node_t *) pattern_node; + return UP(pattern_node); } break; @@ -16957,15 +16597,15 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures case PM_HASH_PATTERN_NODE: { pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner; - if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = node->location.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_NODE(pattern_node, node); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); pattern_node->constant = node; - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); - return (pm_node_t *) pattern_node; + return UP(pattern_node); } break; @@ -16978,8 +16618,8 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures // attaching its constant. In this case we'll create an array pattern and // attach our constant to it. pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing); - pm_array_pattern_node_requireds_append(pattern_node, inner); - return (pm_node_t *) pattern_node; + pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner); + return UP(pattern_node); } /** @@ -16995,21 +16635,20 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { // will check for that here. If they do, then we'll add it to the local // table since this pattern will cause it to become a local variable. if (accept1(parser, PM_TOKEN_IDENTIFIER)) { - pm_token_t identifier = parser->previous; - pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier); + pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous); int depth; if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) { - pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0); + pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier)); - name = (pm_node_t *) pm_local_variable_target_node_create( + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); + name = UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&identifier), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) - ); + )); } // Finally we can return the created node. @@ -17028,7 +16667,7 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) pm_node_t *value = NULL; if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) { - return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous); + return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous)); } if (accept1(parser, PM_TOKEN_IDENTIFIER)) { @@ -17039,16 +16678,16 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); - value = (pm_node_t *) pm_local_variable_target_node_create( + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); + value = UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) - ); + )); } - return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator); + return UP(pm_assoc_splat_node_create(parser, value, &operator)); } /** @@ -17085,22 +16724,24 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u static pm_node_t * parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) { const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc; + const uint8_t *start = parser->start + PM_LOCATION_START(value_loc); + const uint8_t *end = parser->start + PM_LOCATION_END(value_loc); - pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end); + pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end); int depth = -1; - if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) { + if (pm_slice_is_valid_local(parser, start, end)) { depth = pm_parser_local_depth_constant_id(parser, constant_id); } else { - pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS); + pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS); - if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) { - PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start); + if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) { + PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start); } } if (depth == -1) { - pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0); + pm_parser_local_add(parser, constant_id, start, end, 0); } parse_pattern_capture(parser, captures, constant_id, value_loc); @@ -17111,7 +16752,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca (uint32_t) (depth == -1 ? 0 : depth) ); - return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target); + return UP(pm_implicit_node_create(parser, UP(target))); } /** @@ -17120,7 +16761,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca */ static void parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) { - if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) { + if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE); } } @@ -17139,25 +16780,31 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node case PM_NO_KEYWORDS_PARAMETER_NODE: rest = first_node; break; + case PM_INTERPOLATED_SYMBOL_NODE: case PM_SYMBOL_NODE: { - if (pm_symbol_node_label_p(first_node)) { - parse_pattern_hash_key(parser, &keys, first_node); + if (pm_symbol_node_label_p(parser, first_node)) { + if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) { + pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED); + } else { + parse_pattern_hash_key(parser, &keys, first_node); + } + pm_node_t *value; if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) { - // Otherwise, we will create an implicit local variable - // target for the value. - value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node); + if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) { + value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node); + } else { + value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0)); + } } else { // Here we have a value for the first assoc in the list, so // we will parse it now. value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } - pm_token_t operator = not_provided(parser); - pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value); - - pm_node_list_append(&assocs, assoc); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value)); + pm_node_list_append(parser->arena, &assocs, assoc); break; } } @@ -17169,11 +16816,10 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL; pm_parser_err_node(parser, first_node, diag_id); - pm_token_t operator = not_provided(parser); - pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end); - pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value); + pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node))); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value)); - pm_node_list_append(&assocs, assoc); + pm_node_list_append(parser->arena, &assocs, assoc); break; } } @@ -17197,7 +16843,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node rest = assoc; } else { pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST); - pm_node_list_append(&assocs, assoc); + pm_node_list_append(parser->arena, &assocs, assoc); } } else { pm_node_t *key; @@ -17207,36 +16853,43 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) { pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED); - } else if (!pm_symbol_node_label_p(key)) { + } else if (!pm_symbol_node_label_p(parser, key)) { pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA); } + } else if (accept1(parser, PM_TOKEN_LABEL)) { + key = UP(pm_symbol_node_label_create(parser, &parser->previous)); } else { expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA); - key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); + + pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end }; + key = UP(pm_symbol_node_create(parser, NULL, &label, NULL)); } parse_pattern_hash_key(parser, &keys, key); pm_node_t *value = NULL; if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key); + if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) { + value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key); + } else { + value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0)); + } } else { value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } - pm_token_t operator = not_provided(parser); - pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value)); if (rest != NULL) { pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST); } - pm_node_list_append(&assocs, assoc); + pm_node_list_append(parser->arena, &assocs, assoc); } } pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest); - xfree(assocs.nodes); + // assocs.nodes is arena-allocated; no explicit free needed. pm_static_literals_free(&keys); return node; @@ -17258,13 +16911,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); - return (pm_node_t *) pm_local_variable_target_node_create( + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); + return UP(pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) - ); + )); } case PM_TOKEN_BRACKET_LEFT_ARRAY: { pm_token_t opening = parser->current; @@ -17273,7 +16926,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { // If we have an empty array pattern, then we'll just return a new // array pattern node. - return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous); + return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous)); } // Otherwise, we'll parse the inner pattern, then deal with it depending @@ -17281,34 +16934,34 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET); + expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening); pm_token_t closing = parser->previous; switch (PM_NODE_TYPE(inner)) { case PM_ARRAY_PATTERN_NODE: { pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner; - if (pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = opening.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); - return (pm_node_t *) pattern_node; + return UP(pattern_node); } break; } case PM_FIND_PATTERN_NODE: { pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner; - if (pattern_node->opening_loc.start == NULL) { - pattern_node->base.location.start = opening.start; - pattern_node->base.location.end = closing.end; + if (pattern_node->opening_loc.length == 0) { + PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing); - pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + pattern_node->opening_loc = TOK2LOC(parser, &opening); + pattern_node->closing_loc = TOK2LOC(parser, &closing); - return (pm_node_t *) pattern_node; + return UP(pattern_node); } break; @@ -17318,8 +16971,8 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing); - pm_array_pattern_node_requireds_append(node, inner); - return (pm_node_t *) node; + pm_array_pattern_node_requireds_append(parser->arena, node, inner); + return UP(node); } case PM_TOKEN_BRACE_LEFT: { bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; @@ -17339,19 +16992,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm switch (parser->current.type) { case PM_TOKEN_LABEL: parser_lex(parser); - first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); + first_node = UP(pm_symbol_node_label_create(parser, &parser->previous)); break; case PM_TOKEN_USTAR_STAR: first_node = parse_pattern_keyword_rest(parser, captures); break; case PM_TOKEN_STRING_BEGIN: - first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1)); + first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1)); break; default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type)); parser_lex(parser); - first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); + first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); break; } } @@ -17359,18 +17012,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening); pm_token_t closing = parser->previous; - node->base.location.start = opening.start; - node->base.location.end = closing.end; + PM_NODE_START_SET_TOKEN(parser, node, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing); - node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing); + node->opening_loc = TOK2LOC(parser, &opening); + node->closing_loc = TOK2LOC(parser, &closing); } parser->pattern_matching_newlines = previous_pattern_matching_newlines; - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_UDOT_DOT: case PM_TOKEN_UDOT_DOT_DOT: { @@ -17381,21 +17034,27 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // expression as the right side of the range. switch (parser->current.type) { case PM_CASE_PRIMITIVE: { - pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right); + pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1)); + return UP(pm_range_node_create(parser, NULL, &operator, right)); } default: { pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE); - pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end); - return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right); + pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator))); + return UP(pm_range_node_create(parser, NULL, &operator, right)); } } } case PM_CASE_PRIMITIVE: { - pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1)); + pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1)); // If we found a label, we need to immediately return to the caller. - if (pm_symbol_node_label_p(node)) return node; + if (pm_symbol_node_label_p(parser, node)) return node; + + // Call nodes (arithmetic operations) are not allowed in patterns + if (PM_NODE_TYPE(node) == PM_CALL_NODE) { + pm_parser_err_node(parser, node, diag_id); + return UP(pm_error_recovery_node_create_unexpected(parser, node)); + } // Now that we have a primitive, we need to check if it's part of a range. if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) { @@ -17406,11 +17065,11 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // node. Otherwise, we'll create an endless range. switch (parser->current.type) { case PM_CASE_PRIMITIVE: { - pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_range_node_create(parser, node, &operator, right); + pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1)); + return UP(pm_range_node_create(parser, node, &operator, right)); } default: - return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL); + return UP(pm_range_node_create(parser, node, &operator, NULL)); } } @@ -17425,44 +17084,44 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm switch (parser->current.type) { case PM_TOKEN_IDENTIFIER: { parser_lex(parser); - pm_node_t *variable = (pm_node_t *) parse_variable(parser); + pm_node_t *variable = UP(parse_variable(parser)); if (variable == NULL) { - PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE); - variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0); + PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE); + variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0)); } - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } case PM_TOKEN_INSTANCE_VARIABLE: { parser_lex(parser); - pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous); + pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous)); - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } case PM_TOKEN_CLASS_VARIABLE: { parser_lex(parser); - pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous); + pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous)); - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } case PM_TOKEN_GLOBAL_VARIABLE: { parser_lex(parser); - pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous); + pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous)); - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } case PM_TOKEN_NUMBERED_REFERENCE: { parser_lex(parser); - pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous); + pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous)); - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } case PM_TOKEN_BACK_REFERENCE: { parser_lex(parser); - pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous); + pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous)); - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } case PM_TOKEN_PARENTHESIS_LEFT: { bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; @@ -17471,19 +17130,19 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_token_t lparen = parser->current; parser_lex(parser); - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1)); + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1)); parser->pattern_matching_newlines = previous_pattern_matching_newlines; accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); - return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous); + expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen); + return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous)); } default: { // If we get here, then we have a pin operator followed by something // not understood. We'll create a missing node and return that. pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN); - pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end); - return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable); + pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator))); + return UP(pm_pinned_variable_node_create(parser, &operator, variable)); } } } @@ -17494,31 +17153,56 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous); - return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1)); + return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1)); } case PM_TOKEN_CONSTANT: { pm_token_t constant = parser->current; parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant); + pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant)); return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1)); } default: pm_parser_err_current(parser, diag_id); - return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); + } +} + +static bool +parse_pattern_alternation_error_each(const pm_node_t *node, void *data) { + switch (PM_NODE_TYPE(node)) { + case PM_LOCAL_VARIABLE_TARGET_NODE: { + pm_parser_t *parser = (pm_parser_t *) data; + pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE); + return false; + } + default: + return true; } } /** + * When we get here, we know that we already have a syntax error, because we + * know we have captured a variable and that we are in an alternation. + */ +static void +parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) { + pm_visit_node(node, parse_pattern_alternation_error_each, parser); +} + +/** * Parse any number of primitives joined by alternation and ended optionally by * assignment. */ static pm_node_t * parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) { pm_node_t *node = first_node; + bool alternation = false; - while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) { - pm_token_t operator = parser->previous; + while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) { + if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) { + parse_pattern_alternation_error(parser, node); + } switch (parser->current.type) { case PM_TOKEN_IDENTIFIER: @@ -17530,41 +17214,47 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p case PM_TOKEN_UDOT_DOT: case PM_TOKEN_UDOT_DOT_DOT: case PM_CASE_PRIMITIVE: { - if (node == NULL) { + if (!alternation) { node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1)); } else { + pm_token_t operator = parser->previous; pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1)); - node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator); + + if (captures->size) parse_pattern_alternation_error(parser, right); + node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator)); } break; } case PM_TOKEN_PARENTHESIS_LEFT: case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: { + pm_token_t operator = parser->previous; pm_token_t opening = parser->current; parser_lex(parser); pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); - pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous); + expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening); + pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0)); - if (node == NULL) { + if (!alternation) { node = right; } else { - node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator); + if (captures->size) parse_pattern_alternation_error(parser, right); + node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator)); } break; } default: { pm_parser_err_current(parser, diag_id); - pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); - if (node == NULL) { + if (!alternation) { node = right; } else { - node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator); + if (captures->size) parse_pattern_alternation_error(parser, right); + node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous)); } break; @@ -17585,15 +17275,15 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous)); pm_local_variable_target_node_t *target = pm_local_variable_target_node_create( parser, - &PM_LOCATION_TOKEN_VALUE(&parser->previous), + &TOK2LOC(parser, &parser->previous), constant_id, (uint32_t) (depth == -1 ? 0 : depth) ); - node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator); + node = UP(pm_capture_pattern_node_create(parser, node, target, &operator)); } return node; @@ -17612,8 +17302,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag switch (parser->current.type) { case PM_TOKEN_LABEL: { parser_lex(parser); - pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); - node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)); + pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous)); + node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1))); if (!(flags & PM_PARSE_PATTERN_TOP)) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT); @@ -17623,7 +17313,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag } case PM_TOKEN_USTAR_STAR: { node = parse_pattern_keyword_rest(parser, captures); - node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)); + node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); if (!(flags & PM_PARSE_PATTERN_TOP)) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT); @@ -17636,8 +17326,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // be dynamic symbols leading to hash patterns. node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1)); - if (pm_symbol_node_label_p(node)) { - node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)); + if (pm_symbol_node_label_p(parser, node)) { + node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); if (!(flags & PM_PARSE_PATTERN_TOP)) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT); @@ -17652,7 +17342,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag case PM_TOKEN_USTAR: { if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) { parser_lex(parser); - node = (pm_node_t *) parse_pattern_rest(parser, captures); + node = UP(parse_pattern_rest(parser, captures)); leading_rest = true; break; } @@ -17665,8 +17355,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // If we got a dynamic label symbol, then we need to treat it like the // beginning of a hash pattern. - if (pm_symbol_node_label_p(node)) { - return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)); + if (pm_symbol_node_label_p(parser, node)) { + return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1))); } if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) { @@ -17674,20 +17364,20 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // or a find pattern. We need to parse all of the patterns, put them // into a big list, and then determine which type of node we have. pm_node_list_t nodes = { 0 }; - pm_node_list_append(&nodes, node); + pm_node_list_append(parser->arena, &nodes, node); // Gather up all of the patterns into the list. while (accept1(parser, PM_TOKEN_COMMA)) { // Break early here in case we have a trailing comma. - if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) { - node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous); - pm_node_list_append(&nodes, node); + if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) { + node = UP(pm_implicit_rest_node_create(parser, &parser->previous)); + pm_node_list_append(parser->arena, &nodes, node); trailing_rest = true; break; } if (accept1(parser, PM_TOKEN_USTAR)) { - node = (pm_node_t *) parse_pattern_rest(parser, captures); + node = UP(parse_pattern_rest(parser, captures)); // If we have already parsed a splat pattern, then this is an // error. We will continue to parse the rest of the patterns, @@ -17701,7 +17391,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); } - pm_node_list_append(&nodes, node); + pm_node_list_append(parser->arena, &nodes, node); } // If the first pattern and the last pattern are rest patterns, then we @@ -17709,24 +17399,24 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // are in between because we know we already added the appropriate // errors. Otherwise we will create an array pattern. if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) { - node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes); + node = UP(pm_find_pattern_node_create(parser, &nodes)); if (nodes.size == 2) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER); } } else { - node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes); + node = UP(pm_array_pattern_node_node_list_create(parser, &nodes)); if (leading_rest && trailing_rest) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS); } } - xfree(nodes.nodes); + // nodes.nodes is arena-allocated; no explicit free needed. } else if (leading_rest) { // Otherwise, if we parsed a single splat pattern, then we know we have // an array pattern, so we can go ahead and create that node. - node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node); + node = UP(pm_array_pattern_node_rest_create(parser, node)); } return node; @@ -17737,29 +17427,33 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag * from its start bounds. If it's a compound node, then we will recursively * apply this function to its value. */ -static inline void +static PRISM_INLINE void parse_negative_numeric(pm_node_t *node) { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: { pm_integer_node_t *cast = (pm_integer_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->value.negative = true; break; } case PM_FLOAT_NODE: { pm_float_node_t *cast = (pm_float_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->value = -cast->value; break; } case PM_RATIONAL_NODE: { pm_rational_node_t *cast = (pm_rational_node_t *) node; cast->base.location.start--; + cast->base.location.length++; cast->numerator.negative = true; break; } case PM_IMAGINARY_NODE: node->location.start--; + node->location.length++; parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric); break; default: @@ -17777,22 +17471,22 @@ static void pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) { switch (diag_id) { case PM_ERR_HASH_KEY: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type)); break; } case PM_ERR_HASH_VALUE: case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type)); break; } case PM_ERR_UNARY_RECEIVER: { - const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type)); - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]); + const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]); break; } case PM_ERR_UNARY_DISALLOWED: case PM_ERR_EXPECT_ARGUMENT: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type)); break; } default: @@ -17872,6 +17566,7 @@ parse_retry(pm_parser_t *parser, const pm_node_t *node) { case PM_CONTEXT_BEGIN: case PM_CONTEXT_BLOCK_BRACES: case PM_CONTEXT_BLOCK_KEYWORDS: + case PM_CONTEXT_BLOCK_PARAMETERS: case PM_CONTEXT_CASE_IN: case PM_CONTEXT_CASE_WHEN: case PM_CONTEXT_DEFAULT_PARAMS: @@ -17952,6 +17647,7 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) { case PM_CONTEXT_BLOCK_KEYWORDS: case PM_CONTEXT_BLOCK_ELSE: case PM_CONTEXT_BLOCK_ENSURE: + case PM_CONTEXT_BLOCK_PARAMETERS: case PM_CONTEXT_BLOCK_RESCUE: case PM_CONTEXT_CASE_IN: case PM_CONTEXT_CASE_WHEN: @@ -17988,67 +17684,1383 @@ parse_yield(pm_parser_t *parser, const pm_node_t *node) { } /** - * This struct is used to pass information between the regular expression parser - * and the error callback. + * Determine if a given call node looks like a "command", which means it has + * arguments but does not have parentheses. */ -typedef struct { - /** The parser that we are parsing the regular expression for. */ - pm_parser_t *parser; +static PRISM_INLINE bool +pm_call_node_command_p(const pm_call_node_t *node) { + return ( + (node->opening_loc.length == 0) && + (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) && + (node->arguments != NULL || node->block != NULL) + ); +} - /** The start of the regular expression. */ - const uint8_t *start; +/** + * Returns true if the given node is a command-style call (a method call without + * parentheses that has arguments), excluding operator calls (e.g., a + b) which + * satisfy the same structural criteria but are not commands. + */ +static bool +pm_command_call_value_p(const pm_node_t *node) { + switch (PM_NODE_TYPE(node)) { + case PM_CALL_NODE: { + const pm_call_node_t *call = (const pm_call_node_t *) node; - /** The end of the regular expression. */ - const uint8_t *end; + // Command-style calls (e.g., foo bar, obj.foo bar). + // Attribute writes (e.g., a.b = 1) are not commands. + if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) { + return true; + } - /** - * Whether or not the source of the regular expression is shared. This - * impacts the location of error messages, because if it is shared then we - * can use the location directly and if it is not, then we use the bounds of - * the regular expression itself. - */ - bool shared; -} parse_regular_expression_error_data_t; + // A `!` or `not` prefix wrapping a command call (e.g., + // `!foo bar`, `not foo bar`) is also a command-call value. + if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) { + return pm_command_call_value_p(call->receiver); + } + + return false; + } + case PM_SUPER_NODE: { + const pm_super_node_t *cast = (const pm_super_node_t *) node; + return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL); + } + case PM_YIELD_NODE: { + const pm_yield_node_t *cast = (const pm_yield_node_t *) node; + return cast->lparen_loc.length == 0 && cast->arguments != NULL; + } + case PM_RESCUE_MODIFIER_NODE: + return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression); + case PM_DEF_NODE: { + const pm_def_node_t *cast = (const pm_def_node_t *) node; + if (cast->equal_loc.length > 0 && cast->body != NULL) { + const pm_node_t *body = cast->body; + if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) { + body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1]; + } + return pm_command_call_value_p(body); + } + return false; + } + default: + return false; + } +} /** - * This callback is called when the regular expression parser encounters a - * syntax error. + * Returns true if the given node is a block call: a command + * with a do-block, or any call chained (via `.`, `::`, `&.`) from such a node. + * Block calls can only be followed by call chaining, composition (and/or), and + * modifier operators. */ -static void -parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) { - parse_regular_expression_error_data_t *callback_data = (parse_regular_expression_error_data_t *) data; - pm_location_t location; +static bool +pm_block_call_p(const pm_node_t *node) { + while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) { + const pm_call_node_t *call = (const pm_call_node_t *) node; + if (call->opening_loc.length > 0) return false; + + // Root: command with do-block (e.g., `foo bar do end`). + if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) { + return true; + } + + // Walk up the receiver chain (e.g., `foo bar do end.baz`). + if (call->call_operator_loc.length > 0 && call->receiver != NULL) { + node = call->receiver; + continue; + } + + return false; + } + + return false; +} + +/** + * Parse a case expression (the `case` keyword). This handles both case-when and + * case-in (pattern matching) forms. + */ +static pm_node_t * +parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) { + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); + + pm_token_t case_keyword = parser->previous; + pm_node_t *predicate = NULL; + + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + predicate = NULL; + } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) { + predicate = NULL; + } else if (!token_begins_expression_p(parser->current.type)) { + predicate = NULL; + } else { + predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1)); + while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + } + + if (match1(parser, PM_TOKEN_KEYWORD_END)) { + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); + parser_lex(parser); + pop_block_exits(parser, previous_block_exits); + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); + return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous)); + } + + /* At this point we can create a case node, though we don't yet know if it + * is a case-in or case-when node. */ + pm_node_t *node; + + if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { + pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL); + pm_static_literals_t literals = { 0 }; + + /* At this point we've seen a when keyword, so we know this is a + * case-when node. We will continue to parse the when nodes until we hit + * the end of the list. */ + while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); + parser_lex(parser); + + pm_token_t when_keyword = parser->previous; + pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword); + + do { + if (accept1(parser, PM_TOKEN_USTAR)) { + pm_token_t operator = parser->previous; + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + + pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression); + pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node)); + + if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break; + } else { + pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1)); + pm_when_node_conditions_append(parser->arena, when_node, condition); + + /* If we found a missing node, then this is a syntax error + * and we should stop looping. */ + if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break; + + /* If this is a string node, then we need to mark it as + * frozen because when clause strings are frozen. */ + if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) { + pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL); + } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) { + pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL); + } + + pm_when_clause_static_literals_add(parser, &literals, condition); + } + } while (accept1(parser, PM_TOKEN_COMMA)); + + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); + } + } else { + expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER); + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); + } + + if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1)); + if (statements != NULL) { + pm_when_node_statements_set(when_node, statements); + } + } + + pm_case_node_condition_append(parser->arena, case_node, UP(when_node)); + } + + /* If we didn't parse any conditions (in or when) then we need to + * indicate that we have an error. */ + if (case_node->conditions.size == 0) { + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); + } + + pm_static_literals_free(&literals); + node = UP(case_node); + } else { + pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate); + + /* If this is a case-match node (i.e., it is a pattern matching case + * statement) then we must have a predicate. */ + if (predicate == NULL) { + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE); + } + + /* At this point we expect that we're parsing a case-in node. We will + * continue to parse the in nodes until we hit the end of the list. */ + while (match1(parser, PM_TOKEN_KEYWORD_IN)) { + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); + + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = true; + + lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); + parser->command_start = false; + parser_lex(parser); + + pm_token_t in_keyword = parser->previous; + + pm_constant_id_list_t captures = { 0 }; + pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1)); + + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + + /* Since we're in the top-level of the case-in node we need to + * check for guard clauses in the form of `if` or `unless` + * statements. */ + if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) { + pm_token_t keyword = parser->previous; + pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1)); + pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate)); + } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) { + pm_token_t keyword = parser->previous; + pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1)); + pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate)); + } + + /* Now we need to check for the terminator of the in node's pattern. + * It can be a newline or semicolon optionally followed by a `then` + * keyword. */ + pm_token_t then_keyword = { 0 }; + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { + then_keyword = parser->previous; + } + } else { + expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER); + then_keyword = parser->previous; + } + + /* Now we can actually parse the statements associated with the in + * node. */ + pm_statements_node_t *statements; + if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + statements = NULL; + } else { + statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1)); + } + + /* Now that we have the full pattern and statements, we can create + * the node and attach it to the case node. */ + pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword))); + pm_case_match_node_condition_append(parser->arena, case_node, condition); + } + + /* If we didn't parse any conditions (in or when) then we need to + * indicate that we have an error. */ + if (case_node->conditions.size == 0) { + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); + } + + node = UP(case_node); + } + + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) { + pm_token_t else_keyword = parser->previous; + pm_else_node_t *else_node; + + if (!match1(parser, PM_TOKEN_KEYWORD_END)) { + else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current); + } else { + else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current); + } - if (callback_data->shared) { - location = (pm_location_t) { .start = start, .end = end }; + if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { + pm_case_node_else_clause_set((pm_case_node_t *) node, else_node); + } else { + pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node); + } + } + + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword); + + if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { + pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous); } else { - location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end }; + pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous); } - PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message); + pop_block_exits(parser, previous_block_exits); + return node; } /** - * Parse the errors for the regular expression and add them to the parser. + * Parse a class definition expression (the `class` keyword). This handles both + * regular class definitions and singleton class definitions (`class << expr`). */ -static void -parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) { - const pm_string_t *unescaped = &node->unescaped; - parse_regular_expression_error_data_t error_data = { - .parser = parser, - .start = node->base.location.start, - .end = node->base.location.end, - .shared = unescaped->type == PM_STRING_SHARED - }; +static pm_node_t * +parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) { + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); + + pm_token_t class_keyword = parser->previous; + pm_do_loop_stack_push(parser, false); + + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + if (accept1(parser, PM_TOKEN_LESS_LESS)) { + pm_token_t operator = parser->previous; + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1)); + + pm_parser_scope_push(parser, true); + if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type)); + } + + pm_node_t *statements = NULL; + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } + + if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); + } + + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); + + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + + pm_parser_scope_pop(parser); + pm_do_loop_stack_pop(parser); + + flush_block_exits(parser, previous_block_exits); + return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous)); + } + + pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1)); + pm_token_t name = parser->previous; + if (name.type != PM_TOKEN_CONSTANT) { + pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME); + } + + pm_token_t inheritance_operator = { 0 }; + pm_node_t *superclass; + + if (match1(parser, PM_TOKEN_LESS)) { + inheritance_operator = parser->current; + lex_state_set(parser, PM_LEX_STATE_BEG); + + parser->command_start = true; + parser_lex(parser); + + superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1)); + } else { + superclass = NULL; + } + + pm_parser_scope_push(parser, true); + + if (inheritance_operator.start != NULL) { + expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END); + } else { + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + } + pm_node_t *statements = NULL; - pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data); + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } + + if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); + } + + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); + + if (context_def_p(parser)) { + pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD); + } + + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + + pm_parser_scope_pop(parser); + pm_do_loop_stack_pop(parser); + + if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) { + pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME); + if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { + constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path)); + } + } + + pop_block_exits(parser, previous_block_exits); + return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous)); +} + +/** + * Parse a method definition expression (the `def` keyword). + */ +static pm_node_t * +parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) { + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + pm_token_t def_keyword = parser->current; + size_t opening_newline_index = token_newline_index(parser); + + pm_node_t *receiver = NULL; + pm_token_t operator = { 0 }; + pm_token_t name; + + /* This context is necessary for lexing `...` in a bare params correctly. It + * must be pushed before lexing the first param, so it is here. */ + context_push(parser, PM_CONTEXT_DEF_PARAMS); + parser_lex(parser); + + /* This will be false if the method name is not a valid identifier but could + * be followed by an operator. */ + bool valid_name = true; + + switch (parser->current.type) { + case PM_CASE_OPERATOR: + pm_parser_scope_push(parser, true); + lex_state_set(parser, PM_LEX_STATE_ENDFN); + parser_lex(parser); + + name = parser->previous; + break; + case PM_TOKEN_IDENTIFIER: { + parser_lex(parser); + + if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { + receiver = parse_variable_call(parser); + + pm_parser_scope_push(parser, true); + lex_state_set(parser, PM_LEX_STATE_FNAME); + parser_lex(parser); + + operator = parser->previous; + name = parse_method_definition_name(parser); + } else { + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); + pm_parser_scope_push(parser, true); + + name = parser->previous; + } + + break; + } + case PM_TOKEN_INSTANCE_VARIABLE: + case PM_TOKEN_CLASS_VARIABLE: + case PM_TOKEN_GLOBAL_VARIABLE: + valid_name = false; + PRISM_FALLTHROUGH + case PM_TOKEN_CONSTANT: + case PM_TOKEN_KEYWORD_NIL: + case PM_TOKEN_KEYWORD_SELF: + case PM_TOKEN_KEYWORD_TRUE: + case PM_TOKEN_KEYWORD_FALSE: + case PM_TOKEN_KEYWORD___FILE__: + case PM_TOKEN_KEYWORD___LINE__: + case PM_TOKEN_KEYWORD___ENCODING__: { + pm_parser_scope_push(parser, true); + parser_lex(parser); + + pm_token_t identifier = parser->previous; + + if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { + lex_state_set(parser, PM_LEX_STATE_FNAME); + parser_lex(parser); + operator = parser->previous; + + switch (identifier.type) { + case PM_TOKEN_CONSTANT: + receiver = UP(pm_constant_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_INSTANCE_VARIABLE: + receiver = UP(pm_instance_variable_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_CLASS_VARIABLE: + receiver = UP(pm_class_variable_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_GLOBAL_VARIABLE: + receiver = UP(pm_global_variable_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_NIL: + receiver = UP(pm_nil_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_SELF: + receiver = UP(pm_self_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_TRUE: + receiver = UP(pm_true_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_FALSE: + receiver = UP(pm_false_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD___FILE__: + receiver = UP(pm_source_file_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD___LINE__: + receiver = UP(pm_source_line_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD___ENCODING__: + receiver = UP(pm_source_encoding_node_create(parser, &identifier)); + break; + default: + break; + } + + name = parse_method_definition_name(parser); + } else { + if (!valid_name) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type)); + } + + name = identifier; + } + break; + } + case PM_TOKEN_PARENTHESIS_LEFT: { + /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner + * expression of this parenthesis should not be processed under this + * context. Thus, the context is popped here. */ + context_pop(parser); + parser_lex(parser); + + pm_token_t lparen = parser->previous; + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1)); + + accept1(parser, PM_TOKEN_NEWLINE); + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + pm_token_t rparen = parser->previous; + + lex_state_set(parser, PM_LEX_STATE_FNAME); + expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM); + + operator = parser->previous; + receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0)); + + /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as + * described the above. */ + pm_parser_scope_push(parser, true); + context_push(parser, PM_CONTEXT_DEF_PARAMS); + name = parse_method_definition_name(parser); + break; + } + default: + pm_parser_scope_push(parser, true); + name = parse_method_definition_name(parser); + break; + } + + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; + pm_parameters_node_t *params; + + bool accept_endless_def = true; + switch (parser->current.type) { + case PM_TOKEN_PARENTHESIS_LEFT: { + parser_lex(parser); + lparen = parser->previous; + + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + params = NULL; + } else { + /* https://bugs.ruby-lang.org/issues/19107 */ + bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1; + params = parse_parameters( + parser, + PM_BINDING_POWER_DEFINED, + true, + allow_trailing_comma, + true, + true, + false, + PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES, + (uint16_t) (depth + 1) + ); + } + + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->command_start = true; + + context_pop(parser); + if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; + } + + rparen = parser->previous; + break; + } + case PM_CASE_PARAMETER: { + /* If we're about to lex a label, we need to add the label state to + * make sure the next newline is ignored. */ + if (parser->current.type == PM_TOKEN_LABEL) { + lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL); + } + + params = parse_parameters( + parser, + PM_BINDING_POWER_DEFINED, + false, + false, + true, + true, + false, + PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES, + (uint16_t) (depth + 1) + ); + + /* Reject `def * = 1` and similar. We have to specifically check for + * them because they create ambiguity with optional arguments. */ + accept_endless_def = false; + + context_pop(parser); + break; + } + default: { + params = NULL; + context_pop(parser); + break; + } + } + + pm_node_t *statements = NULL; + pm_token_t equal = { 0 }; + pm_token_t end_keyword = { 0 }; + + if (accept1(parser, PM_TOKEN_EQUAL)) { + if (token_is_setter_name(&name)) { + pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER); + } + if (!accept_endless_def) { + pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS); + } + if ( + parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS && + parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS + ) { + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); + } + equal = parser->previous; + + context_push(parser, PM_CONTEXT_DEF); + pm_do_loop_stack_push(parser, false); + statements = UP(pm_statements_node_create(parser)); + + uint8_t allow_flags; + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) { + allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL; + } else { + /* Allow `def foo = puts "Hello"` but not + * `private def foo = puts "Hello"` */ + allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0; + } + + /* Inside a def body, we push true onto the accepts_block_stack so that + * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block + * for primary-level constructs, not commands). During command argument + * parsing, the stack is pushed to false, causing `do` to be lexed as + * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless + * def body and instead left for the outer context. */ + pm_accepts_block_stack_push(parser, true); + pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1)); + pm_accepts_block_stack_pop(parser); + + /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error + * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is + * intentionally not caught here — it should bubble up to the outer + * context (e.g., `private def f = puts "Hello" do end` where the block + * attaches to `private`). */ + if (accept1(parser, PM_TOKEN_KEYWORD_DO)) { + pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1)); + pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK); + } + + if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) { + context_push(parser, PM_CONTEXT_RESCUE_MODIFIER); + + pm_token_t rescue_keyword = parser->previous; + + /* In the Ruby grammar, the rescue value of an endless method + * command excludes and/or and in/=>. */ + pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); + context_pop(parser); + + statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value)); + } + + /* A nested endless def whose body is a command call (e.g., + * `def f = def g = foo bar`) is a command assignment and cannot appear + * as a def body. */ + if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) { + PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + } + + pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); + pm_do_loop_stack_pop(parser); + context_pop(parser); + } else { + if (lparen.start == NULL) { + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->command_start = true; + expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM); + } else { + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + } + + pm_accepts_block_stack_push(parser, true); + pm_do_loop_stack_push(parser, false); + + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } + + if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false); + } + + pm_accepts_block_stack_pop(parser); + pm_do_loop_stack_pop(parser); + + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword); + end_keyword = parser->previous; + } + + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + pm_parser_scope_pop(parser); + + /* If the final character is `@` as is the case when defining methods to + * override the unary operators, we should ignore the @ in the same way we + * do for symbols. */ + pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name)); + + flush_block_exits(parser, previous_block_exits); + + return UP(pm_def_node_create( + parser, + name_id, + &name, + receiver, + params, + statements, + &locals, + &def_keyword, + NTOK2PTR(operator), + NTOK2PTR(lparen), + NTOK2PTR(rparen), + NTOK2PTR(equal), + NTOK2PTR(end_keyword) + )); +} + +/** + * Parse a module definition expression (the `module` keyword). + */ +static pm_node_t * +parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) { + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); + pm_token_t module_keyword = parser->previous; + + pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1)); + pm_token_t name; + + /* If we can recover from a syntax error that occurred while parsing the + * name of the module, then we'll handle that here. */ + if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { + pop_block_exits(parser, previous_block_exits); + + pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing)); + } + + while (accept1(parser, PM_TOKEN_COLON_COLON)) { + pm_token_t double_colon = parser->previous; + + expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous)); + } + + /* Here we retrieve the name of the module. If it wasn't a constant, then + * it's possible that `module foo` was passed, which is a syntax error. We + * handle that here as well. */ + name = parser->previous; + if (name.type != PM_TOKEN_CONSTANT) { + pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME); + } + + if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { + constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path)); + } + + pm_parser_scope_push(parser, true); + accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE); + pm_node_t *statements = NULL; + + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } + + if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false); + } + + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + + pm_parser_scope_pop(parser); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword); + + if (context_def_p(parser)) { + pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD); + } + + pop_block_exits(parser, previous_block_exits); + + return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous)); +} + +/** + * Parse an interpolated word array literal (`%W[...]`). + */ +static pm_node_t * +parse_string_array(pm_parser_t *parser, uint16_t depth) { + parser_lex(parser); + pm_token_t opening = parser->previous; + pm_array_node_t *array = pm_array_node_create(parser, &opening); + + /* This is the current node that we are parsing that will be added to the + * list of elements. */ + pm_node_t *current = NULL; + + while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { + switch (parser->current.type) { + case PM_TOKEN_WORDS_SEP: { + /* Reset the explicit encoding if we hit a separator since each + * element can have its own encoding. */ + parser->explicit_encoding = NULL; + + if (current == NULL) { + /* If we hit a separator before we have any content, then we + * don't need to do anything. */ + } else { + /* If we hit a separator after we've hit content, then we + * need to append that content to the list and reset the + * current node. */ + pm_array_node_elements_append(parser->arena, array, current); + current = NULL; + } + + parser_lex(parser); + break; + } + case PM_TOKEN_STRING_CONTENT: { + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + pm_node_flag_set(string, parse_unescaped_encoding(parser)); + parser_lex(parser); + + if (current == NULL) { + /* If we hit content and the current node is NULL, then this + * is the first string content we've seen. In that case + * we're going to create a new string node and set that to + * the current. */ + current = string; + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + /* If we hit string content and the current node is an + * interpolated string, then we need to append the string + * content to the list of child nodes. */ + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + /* If we hit string content and the current node is a string + * node, then we need to convert the current node into an + * interpolated string and add the string content to the + * list of child nodes. */ + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + pm_interpolated_string_node_append(parser, interpolated, string); + current = UP(interpolated); + } else { + assert(false && "unreachable"); + } + + break; + } + case PM_TOKEN_EMBVAR: { + if (current == NULL) { + /* If we hit an embedded variable and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + /* If we hit an embedded variable and the current node is a + * string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + current = UP(interpolated); + } else { + /* If we hit an embedded variable and the current node is an + * interpolated string, then we'll just add the embedded + * variable. */ + } + + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part); + break; + } + case PM_TOKEN_EMBEXPR_BEGIN: { + if (current == NULL) { + /* If we hit an embedded expression and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + /* If we hit an embedded expression and the current node is + * a string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + current = UP(interpolated); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + /* If we hit an embedded expression and the current node is + * an interpolated string, then we'll just continue on. */ + } else { + assert(false && "unreachable"); + } + + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part); + break; + } + default: + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT); + parser_lex(parser); + break; + } + } + + /* If we have a current node, then we need to append it to the list. */ + if (current) { + pm_array_node_elements_append(parser->arena, array, current); + } + + pm_token_t closing = parser->current; + if (match1(parser, PM_TOKEN_EOF)) { + pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM); + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + } else { + expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM); + } + + pm_array_node_close_set(parser, array, &closing); + return UP(array); +} + +/** + * Parse an interpolated symbol array literal (`%I[...]`). + */ +static pm_node_t * +parse_symbol_array(pm_parser_t *parser, uint16_t depth) { + parser_lex(parser); + pm_token_t opening = parser->previous; + pm_array_node_t *array = pm_array_node_create(parser, &opening); + + /* This is the current node that we are parsing that will be added to the + * list of elements. */ + pm_node_t *current = NULL; + + while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { + switch (parser->current.type) { + case PM_TOKEN_WORDS_SEP: { + if (current == NULL) { + /* If we hit a separator before we have any content, then we + * don't need to do anything. */ + } else { + /* If we hit a separator after we've hit content, then we + * need to append that content to the list and reset the + * current node. */ + pm_array_node_elements_append(parser->arena, array, current); + current = NULL; + } + + parser_lex(parser); + break; + } + case PM_TOKEN_STRING_CONTENT: { + if (current == NULL) { + /* If we hit content and the current node is NULL, then this + * is the first string content we've seen. In that case + * we're going to create a new string node and set that to + * the current. */ + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); + parser_lex(parser); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { + /* If we hit string content and the current node is an + * interpolated string, then we need to append the string + * content to the list of child nodes. */ + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + parser_lex(parser); + + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + /* If we hit string content and the current node is a symbol + * node, then we need to convert the current node into an + * interpolated string and add the string content to the + * list of child nodes. */ + pm_symbol_node_t *cast = (pm_symbol_node_t *) current; + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->start + cast->value_loc.start, + .end = parser->start + cast->value_loc.start + cast->value_loc.length + }; + + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); + parser_lex(parser); + + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string); + pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string); + + current = UP(interpolated); + } else { + assert(false && "unreachable"); + } + + break; + } + case PM_TOKEN_EMBVAR: { + bool start_location_set = false; + if (current == NULL) { + /* If we hit an embedded variable and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + /* If we hit an embedded variable and the current node is a + * string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + + current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); + pm_interpolated_symbol_node_append(parser->arena, interpolated, current); + PM_NODE_START_SET_NODE(interpolated, current); + start_location_set = true; + current = UP(interpolated); + } else { + /* If we hit an embedded variable and the current node is an + * interpolated string, then we'll just add the embedded + * variable. */ + } + + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part); + if (!start_location_set) { + PM_NODE_START_SET_NODE(current, part); + } + break; + } + case PM_TOKEN_EMBEXPR_BEGIN: { + bool start_location_set = false; + if (current == NULL) { + /* If we hit an embedded expression and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + /* If we hit an embedded expression and the current node is + * a string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + + current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); + pm_interpolated_symbol_node_append(parser->arena, interpolated, current); + PM_NODE_START_SET_NODE(interpolated, current); + start_location_set = true; + current = UP(interpolated); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { + /* If we hit an embedded expression and the current node is + * an interpolated string, then we'll just continue on. */ + } else { + assert(false && "unreachable"); + } + + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part); + if (!start_location_set) { + PM_NODE_START_SET_NODE(current, part); + } + break; + } + default: + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT); + parser_lex(parser); + break; + } + } + + /* If we have a current node, then we need to append it to the list. */ + if (current) { + pm_array_node_elements_append(parser->arena, array, current); + } + + pm_token_t closing = parser->current; + if (match1(parser, PM_TOKEN_EOF)) { + pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM); + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + } else { + expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM); + } + pm_array_node_close_set(parser, array, &closing); + + return UP(array); +} + +/** + * Parse a parenthesized expression, which could be a grouping, a multi-target + * assignment, or a set of statements. + */ +static pm_node_t * +parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) { + pm_token_t opening = parser->current; + pm_node_flags_t paren_flags = 0; + + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + parser_lex(parser); + while (true) { + if (accept1(parser, PM_TOKEN_SEMICOLON)) { + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + } else if (!accept1(parser, PM_TOKEN_NEWLINE)) { + break; + } + } + + /* If this is the end of the file or we match a right parenthesis, then we + * have an empty parentheses node, and we can immediately return. */ + if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) { + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + pop_block_exits(parser, previous_block_exits); + return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags)); + } + + /* Otherwise, we're going to parse the first statement in the list of + * statements within the parentheses. */ + pm_accepts_block_stack_push(parser, true); + context_push(parser, PM_CONTEXT_PARENS); + pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); + context_pop(parser); + + /* Determine if this statement is followed by a terminator. In the case of a + * single statement, this is fine. But in the case of multiple statements + * it's required. */ + bool terminator_found = false; + + if (accept1(parser, PM_TOKEN_SEMICOLON)) { + terminator_found = true; + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + } else if (accept1(parser, PM_TOKEN_NEWLINE)) { + terminator_found = true; + } + + if (terminator_found) { + while (true) { + if (accept1(parser, PM_TOKEN_SEMICOLON)) { + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + } else if (!accept1(parser, PM_TOKEN_NEWLINE)) { + break; + } + } + } + + /* If we hit a right parenthesis, then we're done parsing the parentheses + * node, and we can check which kind of node we should return. */ + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) { + lex_state_set(parser, PM_LEX_STATE_ENDARG); + } + + parser_lex(parser); + pm_accepts_block_stack_pop(parser); + pop_block_exits(parser, previous_block_exits); + + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { + /* If we have a single statement and are ending on a right + * parenthesis, then we need to check if this is possibly a multiple + * target node. */ + pm_multi_target_node_t *multi_target; + + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) { + multi_target = (pm_multi_target_node_t *) statement; + } else { + multi_target = pm_multi_target_node_create(parser); + pm_multi_target_node_targets_append(parser, multi_target, statement); + } + + multi_target->lparen_loc = TOK2LOC(parser, &opening); + multi_target->rparen_loc = TOK2LOC(parser, &parser->previous); + PM_NODE_START_SET_TOKEN(parser, multi_target, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous); + + pm_node_t *result; + if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) { + result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + accept1(parser, PM_TOKEN_NEWLINE); + } else { + result = UP(multi_target); + } + + if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) { + /* All set, this is explicitly allowed by the parent context. */ + } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) { + /* All set, we're inside a for loop and we're parsing multiple + * targets. */ + } else if (binding_power != PM_BINDING_POWER_STATEMENT) { + /* Multi targets are not allowed when it's not a statement + * level. */ + pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); + } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) { + /* Multi targets must be followed by an equal sign in order to + * be valid (or a right parenthesis if they are nested). */ + pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); + } + + return result; + } + + /* If we have a single statement and are ending on a right parenthesis + * and we didn't return a multiple assignment node, then we can return a + * regular parentheses node now. */ + pm_statements_node_t *statements = pm_statements_node_create(parser); + pm_statements_node_body_append(parser, statements, statement, true); + + return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags)); + } + + /* If we have more than one statement in the set of parentheses, then we are + * going to parse all of them as a list of statements. We'll do that here. + */ + context_push(parser, PM_CONTEXT_PARENS); + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + + pm_statements_node_t *statements = pm_statements_node_create(parser); + pm_statements_node_body_append(parser, statements, statement, true); + + /* If we didn't find a terminator and we didn't find a right parenthesis, + * then this is a syntax error. */ + if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + } + + /* Parse each statement within the parentheses. */ + while (true) { + pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); + pm_statements_node_body_append(parser, statements, node, true); + + /* If we're recovering from a syntax error, then we need to stop parsing + * the statements now. */ + if (parser->recovering) { + /* If this is the level of context where the recovery has happened, + * then we can mark the parser as done recovering. */ + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false; + break; + } + + /* If we couldn't parse an expression at all, then we need to bail out + * of the loop. */ + if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break; + + /* If we successfully parsed a statement, then we are going to need a + * terminator to delimit them. */ + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break; + } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + break; + } else if (!match1(parser, PM_TOKEN_EOF)) { + /* If we're at the end of the file, then we're going to add an error + * after this for the ) anyway. */ + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + } + } + + context_pop(parser); + pm_accepts_block_stack_pop(parser); + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + + /* When we're parsing multi targets, we allow them to be followed by a right + * parenthesis if they are at the statement level. This is only possible if + * they are the final statement in a parentheses. We need to explicitly + * reject that here. */ + { + pm_node_t *statement = statements->body.nodes[statements->body.size - 1]; + + if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { + pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser); + pm_multi_target_node_targets_append(parser, multi_target, statement); + + statement = UP(multi_target); + statements->body.nodes[statements->body.size - 1] = statement; + } + + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) { + const uint8_t *offset = parser->start + PM_NODE_END(statement); + pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset }; + pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0)); + + statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value)); + statements->body.nodes[statements->body.size - 1] = statement; + + pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED); + } + } + + pop_block_exits(parser, previous_block_exits); + pm_void_statements_check(parser, statements, true); + return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags)); } /** * Parse an expression that begins with the previous node that we just lexed. */ -static inline pm_node_t * -parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) { +static PRISM_INLINE pm_node_t * +parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { switch (parser->current.type) { case PM_TOKEN_BRACKET_LEFT_ARRAY: { parser_lex(parser); @@ -18077,11 +19089,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } else { // If there was no comma, then we need to add a syntax // error. - const uint8_t *location = parser->previous.end; - PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type)); - - parser->previous.start = location; - parser->previous.type = PM_TOKEN_MISSING; + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; } } @@ -18099,28 +19109,28 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) { pm_parser_scope_forwarding_positionals_check(parser, &operator); } else { - expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); } - element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression); + element = UP(pm_splat_node_create(parser, &operator, expression)); } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) { if (parsed_bare_hash) { pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH); } - element = (pm_node_t *) pm_keyword_hash_node_create(parser); + element = UP(pm_keyword_hash_node_create(parser)); pm_static_literals_t hash_keys = { 0 }; - if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) { + if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) { parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1)); } pm_static_literals_free(&hash_keys); parsed_bare_hash = true; } else { - element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1)); + element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1)); - if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { + if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { if (parsed_bare_hash) { pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH); } @@ -18129,18 +19139,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_static_literals_t hash_keys = { 0 }; pm_hash_key_static_literals_add(parser, &hash_keys, element); - pm_token_t operator; + pm_token_t operator = { 0 }; if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { operator = parser->previous; - } else { - operator = not_provided(parser); } - pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value); - pm_keyword_hash_node_elements_append(hash, assoc); + pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value)); + pm_keyword_hash_node_elements_append(parser->arena, hash, assoc); - element = (pm_node_t *) hash; + element = UP(hash); if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) { parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1)); } @@ -18150,213 +19158,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } } - pm_array_node_elements_append(array, element); - if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break; + pm_array_node_elements_append(parser->arena, array, element); + if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break; } accept1(parser, PM_TOKEN_NEWLINE); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type)); parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; + parser->previous.type = 0; } - pm_array_node_close_set(array, &parser->previous); + pm_array_node_close_set(parser, array, &parser->previous); pm_accepts_block_stack_pop(parser); - return (pm_node_t *) array; + return UP(array); } case PM_TOKEN_PARENTHESIS_LEFT: - case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: { - pm_token_t opening = parser->current; - - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - parser_lex(parser); - while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)); - - // If this is the end of the file or we match a right parenthesis, then - // we have an empty parentheses node, and we can immediately return. - if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) { - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous); - } - - // Otherwise, we're going to parse the first statement in the list - // of statements within the parentheses. - pm_accepts_block_stack_push(parser, true); - context_push(parser, PM_CONTEXT_PARENS); - pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); - context_pop(parser); - - // Determine if this statement is followed by a terminator. In the - // case of a single statement, this is fine. But in the case of - // multiple statements it's required. - bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - if (terminator_found) { - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); - } - - // If we hit a right parenthesis, then we're done parsing the - // parentheses node, and we can check which kind of node we should - // return. - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) { - lex_state_set(parser, PM_LEX_STATE_ENDARG); - } - - parser_lex(parser); - pm_accepts_block_stack_pop(parser); - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { - // If we have a single statement and are ending on a right - // parenthesis, then we need to check if this is possibly a - // multiple target node. - pm_multi_target_node_t *multi_target; - - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) { - multi_target = (pm_multi_target_node_t *) statement; - } else { - multi_target = pm_multi_target_node_create(parser); - pm_multi_target_node_targets_append(parser, multi_target, statement); - } - - pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening); - pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); - - multi_target->lparen_loc = lparen_loc; - multi_target->rparen_loc = rparen_loc; - multi_target->base.location.start = lparen_loc.start; - multi_target->base.location.end = rparen_loc.end; - - pm_node_t *result; - if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) { - result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - accept1(parser, PM_TOKEN_NEWLINE); - } else { - result = (pm_node_t *) multi_target; - } - - if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) { - // All set, this is explicitly allowed by the parent - // context. - } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) { - // All set, we're inside a for loop and we're parsing - // multiple targets. - } else if (binding_power != PM_BINDING_POWER_STATEMENT) { - // Multi targets are not allowed when it's not a - // statement level. - pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); - } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) { - // Multi targets must be followed by an equal sign in - // order to be valid (or a right parenthesis if they are - // nested). - pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); - } - - return result; - } - - // If we have a single statement and are ending on a right parenthesis - // and we didn't return a multiple assignment node, then we can return a - // regular parentheses node now. - pm_statements_node_t *statements = pm_statements_node_create(parser); - pm_statements_node_body_append(parser, statements, statement, true); - - return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous); - } - - // If we have more than one statement in the set of parentheses, - // then we are going to parse all of them as a list of statements. - // We'll do that here. - context_push(parser, PM_CONTEXT_PARENS); - pm_statements_node_t *statements = pm_statements_node_create(parser); - pm_statements_node_body_append(parser, statements, statement, true); - - // If we didn't find a terminator and we didn't find a right - // parenthesis, then this is a syntax error. - if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); - } - - // Parse each statement within the parentheses. - while (true) { - pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); - pm_statements_node_body_append(parser, statements, node, true); - - // If we're recovering from a syntax error, then we need to stop - // parsing the statements now. - if (parser->recovering) { - // If this is the level of context where the recovery has - // happened, then we can mark the parser as done recovering. - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false; - break; - } - - // If we couldn't parse an expression at all, then we need to - // bail out of the loop. - if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break; - - // If we successfully parsed a statement, then we are going to - // need terminator to delimit them. - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break; - } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - break; - } else if (!match1(parser, PM_TOKEN_EOF)) { - // If we're at the end of the file, then we're going to add - // an error after this for the ) anyway. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type)); - } - } - - context_pop(parser); - pm_accepts_block_stack_pop(parser); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - - // When we're parsing multi targets, we allow them to be followed by - // a right parenthesis if they are at the statement level. This is - // only possible if they are the final statement in a parentheses. - // We need to explicitly reject that here. - { - pm_node_t *statement = statements->body.nodes[statements->body.size - 1]; - - if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { - pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser); - pm_multi_target_node_targets_append(parser, multi_target, statement); - - statement = (pm_node_t *) multi_target; - statements->body.nodes[statements->body.size - 1] = statement; - } - - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) { - const uint8_t *offset = statement->location.end; - pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset }; - pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset); - - statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value); - statements->body.nodes[statements->body.size - 1] = statement; - - pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED); - } - } - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - pm_void_statements_check(parser, statements, true); - return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous); - } + case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: + return parse_parentheses(parser, binding_power, depth); case PM_TOKEN_BRACE_LEFT: { // If we were passed a current_hash_keys via the parser, then that // means we're already parsing a hash and we want to share the set @@ -18371,14 +19192,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_accepts_block_stack_push(parser, true); parser_lex(parser); - pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous); + pm_token_t opening = parser->previous; + pm_hash_node_t *node = pm_hash_node_create(parser, &opening); if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) { if (current_hash_keys != NULL) { - parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1)); + parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1)); } else { pm_static_literals_t hash_keys = { 0 }; - parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1)); + parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1)); pm_static_literals_free(&hash_keys); } @@ -18386,26 +19208,33 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } pm_accepts_block_stack_pop(parser); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM); - pm_hash_node_closing_loc_set(node, &parser->previous); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening); + pm_hash_node_closing_loc_set(parser, node, &parser->previous); - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_CHARACTER_LITERAL: { - parser_lex(parser); - - pm_token_t opening = parser->previous; - opening.type = PM_TOKEN_STRING_BEGIN; - opening.end = opening.start + 1; - - pm_token_t content = parser->previous; - content.type = PM_TOKEN_STRING_CONTENT; - content.start = content.start + 1; + pm_node_t *node = UP(pm_string_node_create_current_string( + parser, + &(pm_token_t) { + .type = PM_TOKEN_STRING_BEGIN, + .start = parser->current.start, + .end = parser->current.start + 1 + }, + &(pm_token_t) { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->current.start + 1, + .end = parser->current.end + }, + NULL + )); - pm_token_t closing = not_provided(parser); - pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing); pm_node_flag_set(node, parse_unescaped_encoding(parser)); + // Skip past the character literal here, since now we have handled + // parser->explicit_encoding correctly. + parser_lex(parser); + // Characters can be followed by strings in which case they are // automatically concatenated. if (match1(parser, PM_TOKEN_STRING_BEGIN)) { @@ -18416,7 +19245,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } case PM_TOKEN_CLASS_VARIABLE: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous)); if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -18432,16 +19261,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // fact a method call, not a constant read. if ( match1(parser, PM_TOKEN_PARENTHESIS_LEFT) || - (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || + ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) || match1(parser, PM_TOKEN_BRACE_LEFT) ) { pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments); + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); + return UP(pm_call_node_fcall_create(parser, &constant, &arguments)); } - pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous)); if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { // If we get here, then we have a comma immediately following a @@ -18456,7 +19285,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t delimiter = parser->previous; expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); - pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous); + pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous)); if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -18469,7 +19298,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t operator = parser->current; parser_lex(parser); - pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); // Unary .. and ... are special because these are non-associative // operators that can also be unary operators. In this case we need @@ -18479,23 +19308,23 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR); } - return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right); + return UP(pm_range_node_create(parser, NULL, &operator, right)); } case PM_TOKEN_FLOAT: parser_lex(parser); - return (pm_node_t *) pm_float_node_create(parser, &parser->previous); + return UP(pm_float_node_create(parser, &parser->previous)); case PM_TOKEN_FLOAT_IMAGINARY: parser_lex(parser); - return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous); + return UP(pm_float_node_imaginary_create(parser, &parser->previous)); case PM_TOKEN_FLOAT_RATIONAL: parser_lex(parser); - return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous); + return UP(pm_float_node_rational_create(parser, &parser->previous)); case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: parser_lex(parser); - return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous); + return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous)); case PM_TOKEN_NUMBERED_REFERENCE: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous)); if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -18505,7 +19334,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } case PM_TOKEN_GLOBAL_VARIABLE: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous)); if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -18515,7 +19344,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } case PM_TOKEN_BACK_REFERENCE: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous)); if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -18537,26 +19366,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_call_node_t *call = (pm_call_node_t *) node; pm_arguments_t arguments = { 0 }; - if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) { + if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) { // Since we found arguments, we need to turn off the // variable call bit in the flags. - pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL); + pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL); call->opening_loc = arguments.opening_loc; call->arguments = arguments.arguments; call->closing_loc = arguments.closing_loc; call->block = arguments.block; - if (arguments.block != NULL) { - call->base.location.end = arguments.block->location.end; - } else if (arguments.closing_loc.start == NULL) { - if (arguments.arguments != NULL) { - call->base.location.end = arguments.arguments->base.location.end; - } else { - call->base.location.end = call->message_loc.end; - } + const pm_location_t *end = pm_arguments_end(&arguments); + if (end == NULL) { + PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc); } else { - call->base.location.end = arguments.closing_loc.end; + PM_NODE_LENGTH_SET_LOCATION(call, end); } } } else { @@ -18564,19 +19388,19 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // can still be a method call if it is followed by arguments or // a block, so we need to check for that here. if ( - (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || + ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) || match1(parser, PM_TOKEN_BRACE_LEFT) ) { pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments); if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { // If we're about to convert an 'it' implicit local // variable read into a method call, we need to remove // it from the list of implicit local variables. - parse_target_implicit_parameter(parser, node); + pm_node_unreference(parser, node); } else { // Otherwise, we're about to convert a regular local // variable read into a method call, in which case we @@ -18584,16 +19408,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // purposes of warnings. assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)); - if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) { - parse_target_implicit_parameter(parser, node); + if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) { + pm_node_unreference(parser, node); } else { pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name); } } - pm_node_destroy(parser, node); - return (pm_node_t *) fcall; + return UP(fcall); } } @@ -18625,12 +19448,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t content = parse_strings_empty_content(parser->previous.start); if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { - node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY); + node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); } else { - node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY); + node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); } - node->location.end = opening.end; + PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening); } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) { // If we get here, then we tried to find something in the // heredoc but couldn't actually parse anything, so we'll just @@ -18638,7 +19461,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // // parse_string_part handles its own errors, so there is no need // for us to add one here. - node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); + node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { // If we get here, then the part that we parsed was plain string // content and we're at the end of the heredoc, so we can return @@ -18647,8 +19470,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_flag_set(part, parse_unescaped_encoding(parser)); pm_string_node_t *cast = (pm_string_node_t *) part; - cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening); - cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current); + cast->opening_loc = TOK2LOC(parser, &opening); + cast->closing_loc = TOK2LOC(parser, &parser->current); cast->base.location = cast->opening_loc; if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { @@ -18657,21 +19480,21 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) { - parse_heredoc_dedent_string(&cast->unescaped, common_whitespace); + parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace); } - node = (pm_node_t *) cast; + node = UP(cast); expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); } else { // If we get here, then we have multiple parts in the heredoc, // so we'll need to create an interpolated string node to hold // them all. pm_node_list_t parts = { 0 }; - pm_node_list_append(&parts, part); + pm_node_list_append(parser->arena, &parts, part); while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_node_list_append(&parts, part); + pm_node_list_append(parser->arena, &parts, part); } } @@ -18682,19 +19505,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b cast->parts = parts; expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_xstring_node_closing_set(cast, &parser->previous); + pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous); cast->base.location = cast->opening_loc; - node = (pm_node_t *) cast; + node = UP(cast); } else { pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening); - pm_node_list_free(&parts); expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_string_node_closing_set(cast, &parser->previous); + pm_interpolated_string_node_closing_set(parser, cast, &parser->previous); cast->base.location = cast->opening_loc; - node = (pm_node_t *) cast; + node = UP(cast); } // If this is a heredoc that is indented with a ~, then we need @@ -18719,7 +19541,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } case PM_TOKEN_INSTANCE_VARIABLE: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous)); if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -18728,34 +19550,34 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return node; } case PM_TOKEN_INTEGER: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); - return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous); + return UP(pm_integer_node_create(parser, base, &parser->previous)); } case PM_TOKEN_INTEGER_IMAGINARY: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); - return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous); + return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous)); } case PM_TOKEN_INTEGER_RATIONAL: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); - return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous); + return UP(pm_integer_node_rational_create(parser, base, &parser->previous)); } case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: { - pm_node_flags_t base = parser->integer_base; + pm_node_flags_t base = parser->integer.base; parser_lex(parser); - return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous); + return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous)); } case PM_TOKEN_KEYWORD___ENCODING__: parser_lex(parser); - return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous); + return UP(pm_source_encoding_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD___FILE__: parser_lex(parser); - return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous); + return UP(pm_source_file_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD___LINE__: parser_lex(parser); - return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous); + return UP(pm_source_line_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD_ALIAS: { if (binding_power != PM_BINDING_POWER_STATEMENT) { pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS); @@ -18775,245 +19597,27 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) { pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE); } - } else { + } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) { pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); + old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name)); } - return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name); + return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name)); } case PM_SYMBOL_NODE: case PM_INTERPOLATED_SYMBOL_NODE: { - if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) { + if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) { pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); + old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name)); } } PRISM_FALLTHROUGH default: - return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name); - } - } - case PM_TOKEN_KEYWORD_CASE: { - size_t opening_newline_index = token_newline_index(parser); - parser_lex(parser); - - pm_token_t case_keyword = parser->previous; - pm_node_t *predicate = NULL; - - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); - predicate = NULL; - } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) { - predicate = NULL; - } else if (!token_begins_expression_p(parser->current.type)) { - predicate = NULL; - } else { - predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1)); - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); - } - - if (match1(parser, PM_TOKEN_KEYWORD_END)) { - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); - parser_lex(parser); - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); - return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous); - } - - // At this point we can create a case node, though we don't yet know - // if it is a case-in or case-when node. - pm_token_t end_keyword = not_provided(parser); - pm_node_t *node; - - if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { - pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword); - pm_static_literals_t literals = { 0 }; - - // At this point we've seen a when keyword, so we know this is a - // case-when node. We will continue to parse the when nodes - // until we hit the end of the list. - while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); - parser_lex(parser); - - pm_token_t when_keyword = parser->previous; - pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword); - - do { - if (accept1(parser, PM_TOKEN_USTAR)) { - pm_token_t operator = parser->previous; - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); - - pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression); - pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node); - - if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break; - } else { - pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1)); - pm_when_node_conditions_append(when_node, condition); - - // If we found a missing node, then this is a syntax - // error and we should stop looping. - if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break; - - // If this is a string node, then we need to mark it - // as frozen because when clause strings are frozen. - if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) { - pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL); - } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) { - pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL); - } - - pm_when_clause_static_literals_add(parser, &literals, condition); - } - } while (accept1(parser, PM_TOKEN_COMMA)); - - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - pm_when_node_then_keyword_loc_set(when_node, &parser->previous); - } - } else { - expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER); - pm_when_node_then_keyword_loc_set(when_node, &parser->previous); - } - - if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1)); - if (statements != NULL) { - pm_when_node_statements_set(when_node, statements); - } - } - - pm_case_node_condition_append(case_node, (pm_node_t *) when_node); - } - - // If we didn't parse any conditions (in or when) then we need - // to indicate that we have an error. - if (case_node->conditions.size == 0) { - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); - } - - pm_static_literals_free(&literals); - node = (pm_node_t *) case_node; - } else { - pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword); - - // If this is a case-match node (i.e., it is a pattern matching - // case statement) then we must have a predicate. - if (predicate == NULL) { - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE); - } - - // At this point we expect that we're parsing a case-in node. We - // will continue to parse the in nodes until we hit the end of - // the list. - while (match1(parser, PM_TOKEN_KEYWORD_IN)) { - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); - - bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; - parser->pattern_matching_newlines = true; - - lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); - parser->command_start = false; - parser_lex(parser); - - pm_token_t in_keyword = parser->previous; - - pm_constant_id_list_t captures = { 0 }; - pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1)); - - parser->pattern_matching_newlines = previous_pattern_matching_newlines; - pm_constant_id_list_free(&captures); - - // Since we're in the top-level of the case-in node we need - // to check for guard clauses in the form of `if` or - // `unless` statements. - if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) { - pm_token_t keyword = parser->previous; - pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1)); - pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate); - } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) { - pm_token_t keyword = parser->previous; - pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1)); - pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate); - } - - // Now we need to check for the terminator of the in node's - // pattern. It can be a newline or semicolon optionally - // followed by a `then` keyword. - pm_token_t then_keyword; - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - then_keyword = parser->previous; - } else { - then_keyword = not_provided(parser); - } - } else { - expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER); - then_keyword = parser->previous; - } - - // Now we can actually parse the statements associated with - // the in node. - pm_statements_node_t *statements; - if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - statements = NULL; - } else { - statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1)); - } - - // Now that we have the full pattern and statements, we can - // create the node and attach it to the case node. - pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword); - pm_case_match_node_condition_append(case_node, condition); - } - - // If we didn't parse any conditions (in or when) then we need - // to indicate that we have an error. - if (case_node->conditions.size == 0) { - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); - } - - node = (pm_node_t *) case_node; - } - - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) { - pm_token_t else_keyword = parser->previous; - pm_else_node_t *else_node; - - if (!match1(parser, PM_TOKEN_KEYWORD_END)) { - else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current); - } else { - else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current); - } - - if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { - pm_case_node_else_clause_set((pm_case_node_t *) node, else_node); - } else { - pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node); - } - } - - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM); - - if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { - pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous); - } else { - pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous); + return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name)); } - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return node; } + case PM_TOKEN_KEYWORD_CASE: + return parse_case(parser, flags, depth); case PM_TOKEN_KEYWORD_BEGIN: { size_t opening_newline_index = token_newline_index(parser); parser_lex(parser); @@ -19034,15 +19638,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements); parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1)); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM); - - begin_node->base.location.end = parser->previous.end; - pm_begin_node_end_keyword_set(begin_node, &parser->previous); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword); + PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous); + pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous); pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) begin_node; + return UP(begin_node); } case PM_TOKEN_KEYWORD_BEGIN_UPCASE: { pm_node_list_t current_block_exits = { 0 }; @@ -19059,16 +19660,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t opening = parser->previous; pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1)); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening); pm_context_t context = parser->current_context->context; if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) { pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL); } flush_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous); + return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous)); } case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_NEXT: @@ -19085,29 +19684,44 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left; if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) { - parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1)); + pm_token_t next = parser->current; + parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1)); + + // Reject `foo && return bar`. + if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type)); + } + } + + // It's possible that we've parsed a block argument through our + // call to parse_arguments. If we found one, we should mark it + // as invalid and destroy it, as we don't have a place for it. + if (arguments.block != NULL) { + pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT); + pm_node_unreference(parser, arguments.block); + arguments.block = NULL; } } switch (keyword.type) { case PM_TOKEN_KEYWORD_BREAK: { - pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments); + pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments)); if (!parser->partial_script) parse_block_exit(parser, node); return node; } case PM_TOKEN_KEYWORD_NEXT: { - pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments); + pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments)); if (!parser->partial_script) parse_block_exit(parser, node); return node; } case PM_TOKEN_KEYWORD_RETURN: { - pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments); + pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments)); parse_return(parser, node); return node; } default: assert(false && "unreachable"); - return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } } case PM_TOKEN_KEYWORD_SUPER: { @@ -19115,24 +19729,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t keyword = parser->previous; pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); if ( - arguments.opening_loc.start == NULL && + arguments.opening_loc.length == 0 && arguments.arguments == NULL && ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE)) ) { - return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments); + return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments)); } - return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments); + return UP(pm_super_node_create(parser, &keyword, &arguments)); } case PM_TOKEN_KEYWORD_YIELD: { parser_lex(parser); pm_token_t keyword = parser->previous; pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1)); + parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1)); // It's possible that we've parsed a block argument through our // call to parse_arguments_list. If we found one, we should mark it @@ -19140,462 +19754,57 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // yield node. if (arguments.block != NULL) { pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT); - pm_node_destroy(parser, arguments.block); + pm_node_unreference(parser, arguments.block); arguments.block = NULL; } - pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc); + pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc)); if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node); return node; } - case PM_TOKEN_KEYWORD_CLASS: { - size_t opening_newline_index = token_newline_index(parser); - parser_lex(parser); - - pm_token_t class_keyword = parser->previous; - pm_do_loop_stack_push(parser, false); - - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - if (accept1(parser, PM_TOKEN_LESS_LESS)) { - pm_token_t operator = parser->previous; - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1)); - - pm_parser_scope_push(parser, true); - if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type)); - } - - pm_node_t *statements = NULL; - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); - } - - if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); - } - - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM); - - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - - pm_parser_scope_pop(parser); - pm_do_loop_stack_pop(parser); - - flush_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous); - } - - pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1)); - pm_token_t name = parser->previous; - if (name.type != PM_TOKEN_CONSTANT) { - pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME); - } - - pm_token_t inheritance_operator; - pm_node_t *superclass; - - if (match1(parser, PM_TOKEN_LESS)) { - inheritance_operator = parser->current; - lex_state_set(parser, PM_LEX_STATE_BEG); - - parser->command_start = true; - parser_lex(parser); - - superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1)); - } else { - inheritance_operator = not_provided(parser); - superclass = NULL; - } - - pm_parser_scope_push(parser, true); - - if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) { - expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END); - } else { - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - } - pm_node_t *statements = NULL; - - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); - } - - if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); - } - - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM); - - if (context_def_p(parser)) { - pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD); - } - - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - - pm_parser_scope_pop(parser); - pm_do_loop_stack_pop(parser); - - if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) { - pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME); - } - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous); - } - case PM_TOKEN_KEYWORD_DEF: { - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - pm_token_t def_keyword = parser->current; - size_t opening_newline_index = token_newline_index(parser); - - pm_node_t *receiver = NULL; - pm_token_t operator = not_provided(parser); - pm_token_t name; - - // This context is necessary for lexing `...` in a bare params - // correctly. It must be pushed before lexing the first param, so it - // is here. - context_push(parser, PM_CONTEXT_DEF_PARAMS); - parser_lex(parser); - - // This will be false if the method name is not a valid identifier - // but could be followed by an operator. - bool valid_name = true; - - switch (parser->current.type) { - case PM_CASE_OPERATOR: - pm_parser_scope_push(parser, true); - lex_state_set(parser, PM_LEX_STATE_ENDFN); - parser_lex(parser); - - name = parser->previous; - break; - case PM_TOKEN_IDENTIFIER: { - parser_lex(parser); - - if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { - receiver = parse_variable_call(parser); - - pm_parser_scope_push(parser, true); - lex_state_set(parser, PM_LEX_STATE_FNAME); - parser_lex(parser); - - operator = parser->previous; - name = parse_method_definition_name(parser); - } else { - pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end); - pm_parser_scope_push(parser, true); - - name = parser->previous; - } - - break; - } - case PM_TOKEN_INSTANCE_VARIABLE: - case PM_TOKEN_CLASS_VARIABLE: - case PM_TOKEN_GLOBAL_VARIABLE: - valid_name = false; - PRISM_FALLTHROUGH - case PM_TOKEN_CONSTANT: - case PM_TOKEN_KEYWORD_NIL: - case PM_TOKEN_KEYWORD_SELF: - case PM_TOKEN_KEYWORD_TRUE: - case PM_TOKEN_KEYWORD_FALSE: - case PM_TOKEN_KEYWORD___FILE__: - case PM_TOKEN_KEYWORD___LINE__: - case PM_TOKEN_KEYWORD___ENCODING__: { - pm_parser_scope_push(parser, true); - parser_lex(parser); - - pm_token_t identifier = parser->previous; - - if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { - lex_state_set(parser, PM_LEX_STATE_FNAME); - parser_lex(parser); - operator = parser->previous; - - switch (identifier.type) { - case PM_TOKEN_CONSTANT: - receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier); - break; - case PM_TOKEN_INSTANCE_VARIABLE: - receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier); - break; - case PM_TOKEN_CLASS_VARIABLE: - receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier); - break; - case PM_TOKEN_GLOBAL_VARIABLE: - receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD_NIL: - receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD_SELF: - receiver = (pm_node_t *) pm_self_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD_TRUE: - receiver = (pm_node_t *) pm_true_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD_FALSE: - receiver = (pm_node_t *) pm_false_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD___FILE__: - receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD___LINE__: - receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier); - break; - case PM_TOKEN_KEYWORD___ENCODING__: - receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier); - break; - default: - break; - } - - name = parse_method_definition_name(parser); - } else { - if (!valid_name) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type)); - } - - name = identifier; - } - break; - } - case PM_TOKEN_PARENTHESIS_LEFT: { - // The current context is `PM_CONTEXT_DEF_PARAMS`, however - // the inner expression of this parenthesis should not be - // processed under this context. Thus, the context is popped - // here. - context_pop(parser); - parser_lex(parser); - - pm_token_t lparen = parser->previous; - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1)); - - accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pm_token_t rparen = parser->previous; - - lex_state_set(parser, PM_LEX_STATE_FNAME); - expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM); - - operator = parser->previous; - receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen); - - // To push `PM_CONTEXT_DEF_PARAMS` again is for the same - // reason as described the above. - pm_parser_scope_push(parser, true); - context_push(parser, PM_CONTEXT_DEF_PARAMS); - name = parse_method_definition_name(parser); - break; - } - default: - pm_parser_scope_push(parser, true); - name = parse_method_definition_name(parser); - break; - } - - pm_token_t lparen; - pm_token_t rparen; - pm_parameters_node_t *params; - - switch (parser->current.type) { - case PM_TOKEN_PARENTHESIS_LEFT: { - parser_lex(parser); - lparen = parser->previous; - - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - params = NULL; - } else { - params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1)); - } - - lex_state_set(parser, PM_LEX_STATE_BEG); - parser->command_start = true; - - context_pop(parser); - if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type)); - parser->previous.start = parser->previous.end; - parser->previous.type = PM_TOKEN_MISSING; - } - - rparen = parser->previous; - break; - } - case PM_CASE_PARAMETER: { - // If we're about to lex a label, we need to add the label - // state to make sure the next newline is ignored. - if (parser->current.type == PM_TOKEN_LABEL) { - lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL); - } - - lparen = not_provided(parser); - rparen = not_provided(parser); - params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1)); - - context_pop(parser); - break; - } - default: { - lparen = not_provided(parser); - rparen = not_provided(parser); - params = NULL; - - context_pop(parser); - break; - } - } - - pm_node_t *statements = NULL; - pm_token_t equal; - pm_token_t end_keyword; - - if (accept1(parser, PM_TOKEN_EQUAL)) { - if (token_is_setter_name(&name)) { - pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER); - } - equal = parser->previous; - - context_push(parser, PM_CONTEXT_DEF); - pm_do_loop_stack_push(parser, false); - statements = (pm_node_t *) pm_statements_node_create(parser); - - pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1)); - - if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) { - context_push(parser, PM_CONTEXT_RESCUE_MODIFIER); - - pm_token_t rescue_keyword = parser->previous; - pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); - context_pop(parser); - - statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value); - } - - pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); - pm_do_loop_stack_pop(parser); - context_pop(parser); - end_keyword = not_provided(parser); - } else { - equal = not_provided(parser); - - if (lparen.type == PM_TOKEN_NOT_PROVIDED) { - lex_state_set(parser, PM_LEX_STATE_BEG); - parser->command_start = true; - expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM); - } else { - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - } - - pm_accepts_block_stack_push(parser, true); - pm_do_loop_stack_push(parser, false); - - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); - } - - if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false); - } - - pm_accepts_block_stack_pop(parser); - pm_do_loop_stack_pop(parser); - - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM); - end_keyword = parser->previous; - } - - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - pm_parser_scope_pop(parser); - - /** - * If the final character is @. As is the case when defining - * methods to override the unary operators, we should ignore - * the @ in the same way we do for symbols. - */ - pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name)); - - flush_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_def_node_create( - parser, - name_id, - &name, - receiver, - params, - statements, - &locals, - &def_keyword, - &operator, - &lparen, - &rparen, - &equal, - &end_keyword - ); - } + case PM_TOKEN_KEYWORD_CLASS: + return parse_class(parser, flags, depth); + case PM_TOKEN_KEYWORD_DEF: + return parse_def(parser, binding_power, flags, depth); case PM_TOKEN_KEYWORD_DEFINED: { parser_lex(parser); - pm_token_t keyword = parser->previous; - pm_token_t lparen; - pm_token_t rparen; + pm_token_t keyword = parser->previous; + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; pm_node_t *expression; + context_push(parser, PM_CONTEXT_DEFINED); + bool newline = accept1(parser, PM_TOKEN_NEWLINE); if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { lparen = parser->previous; - expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); - if (parser->recovering) { - rparen = not_provided(parser); + if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0)); + lparen = (pm_token_t) { 0 }; } else { - accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - rparen = parser->previous; + expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); + + if (!parser->recovering) { + accept1(parser, PM_TOKEN_NEWLINE); + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + rparen = parser->previous; + } } } else { - lparen = not_provided(parser); - rparen = not_provided(parser); - expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); + expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1)); } context_pop(parser); - return (pm_node_t *) pm_defined_node_create( + return UP(pm_defined_node_create( parser, - &lparen, + NTOK2PTR(lparen), expression, - &rparen, - &PM_LOCATION_TOKEN_VALUE(&keyword) - ); + NTOK2PTR(rparen), + &keyword + )); } case PM_TOKEN_KEYWORD_END_UPCASE: { if (binding_power != PM_BINDING_POWER_STATEMENT) { @@ -19613,12 +19822,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t opening = parser->previous; pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1)); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM); - return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening); + return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous)); } case PM_TOKEN_KEYWORD_FALSE: parser_lex(parser); - return (pm_node_t *) pm_false_node_create(parser, &parser->previous); + return UP(pm_false_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD_FOR: { size_t opening_newline_index = token_newline_index(parser); parser_lex(parser); @@ -19634,15 +19843,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *name = NULL; if (token_begins_expression_p(parser->current.type)) { - name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); } - index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name); + index = UP(pm_splat_node_create(parser, &star_operator, name)); } else if (token_begins_expression_p(parser->current.type)) { - index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); + index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1)); } else { pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX); - index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end); + index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword))); } // Now, if there are multiple index expressions, parse them out. @@ -19658,16 +19867,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN); pm_token_t in_keyword = parser->previous; - pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1)); + pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1)); pm_do_loop_stack_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type)); } } @@ -19677,13 +19885,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword); - return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous); + return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous)); } case PM_TOKEN_KEYWORD_IF: if (parser_end_of_line_p(parser)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL); } size_t opening_newline_index = token_newline_index(parser); @@ -19700,26 +19908,24 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous); pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1)); - if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) { - pm_node_destroy(parser, name); + if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) { } else { - pm_undef_node_append(undef, name); + pm_undef_node_append(parser->arena, undef, name); while (match1(parser, PM_TOKEN_COMMA)) { lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM); parser_lex(parser); name = parse_undef_argument(parser, (uint16_t) (depth + 1)); - if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) { - pm_node_destroy(parser, name); + if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) { break; } - pm_undef_node_append(undef, name); + pm_undef_node_append(parser->arena, undef, name); } } - return (pm_node_t *) undef; + return UP(undef); } case PM_TOKEN_KEYWORD_NOT: { parser_lex(parser); @@ -19728,28 +19934,46 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_arguments_t arguments = { 0 }; pm_node_t *receiver = NULL; + // The `not` keyword without parentheses is only valid in contexts + // where it would be parsed as an expression (i.e., at or below + // the `not` binding power level). In other contexts (e.g., method + // arguments, array elements, assignment right-hand sides), + // parentheses are required: `not(x)`. An exception is made for + // endless def bodies, where `not` is valid as both `arg` and + // `command` (e.g., `def f = not 1`, `def f = not foo bar`). + if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { + if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) { + pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN); + } else { + accept1(parser, PM_TOKEN_NEWLINE); + pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER); + } + + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); + } + accept1(parser, PM_TOKEN_NEWLINE); if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { pm_token_t lparen = parser->previous; if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous); + receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0)); } else { - arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&lparen); - receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1)); + arguments.opening_loc = TOK2LOC(parser, &lparen); + receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1)); if (!parser->recovering) { accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.closing_loc = TOK2LOC(parser, &parser->previous); } } } else { - receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1)); + receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1)); } - return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments); + return UP(pm_call_node_not_create(parser, receiver, &message, &arguments)); } case PM_TOKEN_KEYWORD_UNLESS: { size_t opening_newline_index = token_newline_index(parser); @@ -19757,81 +19981,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1)); } - case PM_TOKEN_KEYWORD_MODULE: { - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - size_t opening_newline_index = token_newline_index(parser); - parser_lex(parser); - pm_token_t module_keyword = parser->previous; - - pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1)); - pm_token_t name; - - // If we can recover from a syntax error that occurred while parsing - // the name of the module, then we'll handle that here. - if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) { - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing); - } - - while (accept1(parser, PM_TOKEN_COLON_COLON)) { - pm_token_t double_colon = parser->previous; - - expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); - constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous); - } - - // Here we retrieve the name of the module. If it wasn't a constant, - // then it's possible that `module foo` was passed, which is a - // syntax error. We handle that here as well. - name = parser->previous; - if (name.type != PM_TOKEN_CONSTANT) { - pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME); - } - - pm_parser_scope_push(parser, true); - accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE); - pm_node_t *statements = NULL; - - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); - } - - if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false); - } - - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - - pm_parser_scope_pop(parser); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM); - - if (context_def_p(parser)) { - pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD); - } - - pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous); - } + case PM_TOKEN_KEYWORD_MODULE: + return parse_module(parser, flags, depth); case PM_TOKEN_KEYWORD_NIL: parser_lex(parser); - return (pm_node_t *) pm_nil_node_create(parser, &parser->previous); + return UP(pm_nil_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD_REDO: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous)); if (!parser->partial_script) parse_block_exit(parser, node); return node; @@ -19839,17 +19997,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b case PM_TOKEN_KEYWORD_RETRY: { parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous); + pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous)); parse_retry(parser, node); return node; } case PM_TOKEN_KEYWORD_SELF: parser_lex(parser); - return (pm_node_t *) pm_self_node_create(parser, &parser->previous); + return UP(pm_self_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD_TRUE: parser_lex(parser); - return (pm_node_t *) pm_true_node_create(parser, &parser->previous); + return UP(pm_true_node_create(parser, &parser->previous)); case PM_TOKEN_KEYWORD_UNTIL: { size_t opening_newline_index = token_newline_index(parser); @@ -19858,16 +20016,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t keyword = parser->previous; - pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1)); + pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1)); pm_do_loop_stack_pop(parser); context_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE); } @@ -19880,9 +20037,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword); - return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0); + return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0)); } case PM_TOKEN_KEYWORD_WHILE: { size_t opening_newline_index = token_newline_index(parser); @@ -19892,16 +20049,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t keyword = parser->previous; - pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1)); + pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1)); pm_do_loop_stack_pop(parser); context_pop(parser); - pm_token_t do_keyword; + pm_token_t do_keyword = { 0 }; if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) { do_keyword = parser->previous; } else { - do_keyword = not_provided(parser); expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE); } @@ -19914,381 +20070,122 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword); - return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0); + return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0)); } case PM_TOKEN_PERCENT_LOWER_I: { parser_lex(parser); pm_token_t opening = parser->previous; pm_array_node_t *array = pm_array_node_create(parser, &opening); + pm_node_t *current = NULL; while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { accept1(parser, PM_TOKEN_WORDS_SEP); if (match1(parser, PM_TOKEN_STRING_END)) break; - if (match1(parser, PM_TOKEN_STRING_CONTENT)) { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)); - } - - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT); - } - - pm_token_t closing = parser->current; - if (match1(parser, PM_TOKEN_EOF)) { - pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - } else { - expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM); - } - pm_array_node_close_set(array, &closing); - - return (pm_node_t *) array; - } - case PM_TOKEN_PERCENT_UPPER_I: { - parser_lex(parser); - pm_token_t opening = parser->previous; - pm_array_node_t *array = pm_array_node_create(parser, &opening); - - // This is the current node that we are parsing that will be added to the - // list of elements. - pm_node_t *current = NULL; - - while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { - switch (parser->current.type) { - case PM_TOKEN_WORDS_SEP: { - if (current == NULL) { - // If we hit a separator before we have any content, then we don't - // need to do anything. - } else { - // If we hit a separator after we've hit content, then we need to - // append that content to the list and reset the current node. - pm_array_node_elements_append(array, current); - current = NULL; - } - + // Interpolation is not possible but nested heredocs can still lead to + // consecutive (disjoint) string tokens when the final newline is escaped. + while (match1(parser, PM_TOKEN_STRING_CONTENT)) { + // Record the string node, moving to interpolation if needed. + if (current == NULL) { + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); + parser_lex(parser); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + parser_lex(parser); + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + pm_symbol_node_t *cast = (pm_symbol_node_t *) current; + pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length }; + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); parser_lex(parser); - break; - } - case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - if (current == NULL) { - // If we hit content and the current node is NULL, then this is - // the first string content we've seen. In that case we're going - // to create a new string node and set that to the current. - current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing); - parser_lex(parser); - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { - // If we hit string content and the current node is an - // interpolated string, then we need to append the string content - // to the list of child nodes. - pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); - parser_lex(parser); - - pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); - } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { - // If we hit string content and the current node is a symbol node, - // then we need to convert the current node into an interpolated - // string and add the string content to the list of child nodes. - pm_symbol_node_t *cast = (pm_symbol_node_t *) current; - pm_token_t bounds = not_provided(parser); - - pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end }; - pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped); - pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing); - parser_lex(parser); - - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); - pm_interpolated_symbol_node_append(interpolated, first_string); - pm_interpolated_symbol_node_append(interpolated, second_string); - - xfree(current); - current = (pm_node_t *) interpolated; - } else { - assert(false && "unreachable"); - } - - break; - } - case PM_TOKEN_EMBVAR: { - bool start_location_set = false; - if (current == NULL) { - // If we hit an embedded variable and the current node is NULL, - // then this is the start of a new string. We'll set the current - // node to a new interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); - } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { - // If we hit an embedded variable and the current node is a string - // node, then we'll convert the current into an interpolated - // string and add the string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); - - current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current); - pm_interpolated_symbol_node_append(interpolated, current); - interpolated->base.location.start = current->location.start; - start_location_set = true; - current = (pm_node_t *) interpolated; - } else { - // If we hit an embedded variable and the current node is an - // interpolated string, then we'll just add the embedded variable. - } - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part); - if (!start_location_set) { - current->location.start = part->location.start; - } - break; - } - case PM_TOKEN_EMBEXPR_BEGIN: { - bool start_location_set = false; - if (current == NULL) { - // If we hit an embedded expression and the current node is NULL, - // then this is the start of a new string. We'll set the current - // node to a new interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); - } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { - // If we hit an embedded expression and the current node is a - // string node, then we'll convert the current into an - // interpolated string and add the string node to the list of - // parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); - - current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current); - pm_interpolated_symbol_node_append(interpolated, current); - interpolated->base.location.start = current->location.start; - start_location_set = true; - current = (pm_node_t *) interpolated; - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { - // If we hit an embedded expression and the current node is an - // interpolated string, then we'll just continue on. - } else { - assert(false && "unreachable"); - } + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string); + pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string); - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part); - if (!start_location_set) { - current->location.start = part->location.start; - } - break; + // current is arena-allocated so no explicit free is needed. + current = UP(interpolated); + } else { + assert(false && "unreachable"); } - default: - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT); - parser_lex(parser); - break; } - } - // If we have a current node, then we need to append it to the list. - if (current) { - pm_array_node_elements_append(array, current); + if (current) { + pm_array_node_elements_append(parser->arena, array, current); + current = NULL; + } else { + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT); + } } pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { - pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM); + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { - expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM); + expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM); } - pm_array_node_close_set(array, &closing); + pm_array_node_close_set(parser, array, &closing); - return (pm_node_t *) array; + return UP(array); } + case PM_TOKEN_PERCENT_UPPER_I: + return parse_symbol_array(parser, depth); case PM_TOKEN_PERCENT_LOWER_W: { parser_lex(parser); pm_token_t opening = parser->previous; pm_array_node_t *array = pm_array_node_create(parser, &opening); - - // skip all leading whitespaces - accept1(parser, PM_TOKEN_WORDS_SEP); + pm_node_t *current = NULL; while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { accept1(parser, PM_TOKEN_WORDS_SEP); if (match1(parser, PM_TOKEN_STRING_END)) break; - if (match1(parser, PM_TOKEN_STRING_CONTENT)) { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); - pm_array_node_elements_append(array, string); + // Interpolation is not possible but nested heredocs can still lead to + // consecutive (disjoint) string tokens when the final newline is escaped. + while (match1(parser, PM_TOKEN_STRING_CONTENT)) { + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + + // Record the string node, moving to interpolation if needed. + if (current == NULL) { + current = string; + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + pm_interpolated_string_node_append(parser, interpolated, string); + current = UP(interpolated); + } else { + assert(false && "unreachable"); + } + parser_lex(parser); } - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); + if (current) { + pm_array_node_elements_append(parser->arena, array, current); + current = NULL; + } else { + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); + } } pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM); } - pm_array_node_close_set(array, &closing); - return (pm_node_t *) array; - } - case PM_TOKEN_PERCENT_UPPER_W: { - parser_lex(parser); - pm_token_t opening = parser->previous; - pm_array_node_t *array = pm_array_node_create(parser, &opening); - - // This is the current node that we are parsing that will be added - // to the list of elements. - pm_node_t *current = NULL; - - while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { - switch (parser->current.type) { - case PM_TOKEN_WORDS_SEP: { - // Reset the explicit encoding if we hit a separator - // since each element can have its own encoding. - parser->explicit_encoding = NULL; - - if (current == NULL) { - // If we hit a separator before we have any content, - // then we don't need to do anything. - } else { - // If we hit a separator after we've hit content, - // then we need to append that content to the list - // and reset the current node. - pm_array_node_elements_append(array, current); - current = NULL; - } - - parser_lex(parser); - break; - } - case PM_TOKEN_STRING_CONTENT: { - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing); - pm_node_flag_set(string, parse_unescaped_encoding(parser)); - parser_lex(parser); - - if (current == NULL) { - // If we hit content and the current node is NULL, - // then this is the first string content we've seen. - // In that case we're going to create a new string - // node and set that to the current. - current = string; - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { - // If we hit string content and the current node is - // an interpolated string, then we need to append - // the string content to the list of child nodes. - pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string); - } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - // If we hit string content and the current node is - // a string node, then we need to convert the - // current node into an interpolated string and add - // the string content to the list of child nodes. - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); - pm_interpolated_string_node_append(interpolated, current); - pm_interpolated_string_node_append(interpolated, string); - current = (pm_node_t *) interpolated; - } else { - assert(false && "unreachable"); - } - - break; - } - case PM_TOKEN_EMBVAR: { - if (current == NULL) { - // If we hit an embedded variable and the current - // node is NULL, then this is the start of a new - // string. We'll set the current node to a new - // interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing); - } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - // If we hit an embedded variable and the current - // node is a string node, then we'll convert the - // current into an interpolated string and add the - // string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); - pm_interpolated_string_node_append(interpolated, current); - current = (pm_node_t *) interpolated; - } else { - // If we hit an embedded variable and the current - // node is an interpolated string, then we'll just - // add the embedded variable. - } - - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part); - break; - } - case PM_TOKEN_EMBEXPR_BEGIN: { - if (current == NULL) { - // If we hit an embedded expression and the current - // node is NULL, then this is the start of a new - // string. We'll set the current node to a new - // interpolated string. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing); - } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - // If we hit an embedded expression and the current - // node is a string node, then we'll convert the - // current into an interpolated string and add the - // string node to the list of parts. - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); - pm_interpolated_string_node_append(interpolated, current); - current = (pm_node_t *) interpolated; - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { - // If we hit an embedded expression and the current - // node is an interpolated string, then we'll just - // continue on. - } else { - assert(false && "unreachable"); - } - - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part); - break; - } - default: - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT); - parser_lex(parser); - break; - } - } - - // If we have a current node, then we need to append it to the list. - if (current) { - pm_array_node_elements_append(array, current); - } - - pm_token_t closing = parser->current; - if (match1(parser, PM_TOKEN_EOF)) { - pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - } else { - expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM); - } - - pm_array_node_close_set(array, &closing); - return (pm_node_t *) array; + pm_array_node_close_set(parser, array, &closing); + return UP(array); } + case PM_TOKEN_PERCENT_UPPER_W: + return parse_string_array(parser, depth); case PM_TOKEN_REGEXP_BEGIN: { pm_token_t opening = parser->current; parser_lex(parser); @@ -20305,10 +20202,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); - pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous); - pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING); - - return node; + pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous); + pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL)); + return UP(node); } pm_interpolated_regular_expression_node_t *interpolated; @@ -20320,7 +20216,6 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // regular expression) or if it's not then it has interpolation. pm_string_t unescaped = parser->current_string; pm_token_t content = parser->current; - bool ascii_only = parser->current_regular_expression_ascii_only; parser_lex(parser); // If we hit an end, then we can create a regular expression @@ -20329,26 +20224,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b if (accept1(parser, PM_TOKEN_REGEXP_END)) { pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped); - // If we're not immediately followed by a =~, then we want - // to parse all of the errors at this point. If it is - // followed by a =~, then it will get parsed higher up while - // parsing the named captures as well. + // If we're not immediately followed by a =~, then we + // parse and validate now. If it is followed by a =~, + // then it will get parsed in the =~ handler where + // named captures can also be extracted. if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) { - parse_regular_expression_errors(parser, node); + pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL)); } - pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags)); - return (pm_node_t *) node; + return UP(node); } // If we get here, then we have interpolation so we'll need to create // a regular expression node with interpolation. interpolated = pm_interpolated_regular_expression_node_create(parser, &opening); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped); - + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) { // This is extremely strange, but the first string part of a // regular expression will always be tagged as binary if we @@ -20356,7 +20247,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING); } - pm_interpolated_regular_expression_node_append(interpolated, part); + pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part); } else { // If the first part of the body of the regular expression is not a // string content, then we have interpolation and we need to create an @@ -20369,20 +20260,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *part; while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) { if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_interpolated_regular_expression_node_append(interpolated, part); + pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part); } } pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM); } pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing); - return (pm_node_t *) interpolated; + return UP(interpolated); } case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: { @@ -20404,7 +20295,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b }; parser_lex(parser); - return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous); + return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous)); } pm_interpolated_x_string_node_t *node; @@ -20419,7 +20310,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); if (match1(parser, PM_TOKEN_STRING_END)) { - pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped); + pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped)); pm_node_flag_set(node, parse_unescaped_encoding(parser)); parser_lex(parser); return node; @@ -20429,13 +20320,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // create a string node with interpolation. node = pm_interpolated_xstring_node_create(parser, &opening, &opening); - pm_token_t opening = not_provided(parser); - pm_token_t closing = not_provided(parser); - - pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped); + pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped)); pm_node_flag_set(part, parse_unescaped_encoding(parser)); - pm_interpolated_xstring_node_append(node, part); + pm_interpolated_xstring_node_append(parser->arena, node, part); } else { // If the first part of the body of the string is not a string // content, then we have interpolation and we need to create an @@ -20446,20 +20334,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_node_t *part; while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_interpolated_xstring_node_append(node, part); + pm_interpolated_xstring_node_append(parser->arena, node, part); } } pm_token_t closing = parser->current; if (match1(parser, PM_TOKEN_EOF)) { pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM); - closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } else { expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM); } - pm_interpolated_xstring_node_closing_set(node, &closing); + pm_interpolated_xstring_node_closing_set(parser, node, &closing); - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_USTAR: { parser_lex(parser); @@ -20469,17 +20357,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // still lex past it though and create a missing node place. if (binding_power != PM_BINDING_POWER_STATEMENT) { pm_parser_err_prefix(parser, diag_id); - return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } pm_token_t operator = parser->previous; pm_node_t *name = NULL; if (token_begins_expression_p(parser->current.type)) { - name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); } - pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name); + pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name)); if (match1(parser, PM_TOKEN_COMMA)) { return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); @@ -20495,11 +20383,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t operator = parser->previous; - pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); + pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!"); pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT); - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_TILDE: { if (binding_power > PM_BINDING_POWER_UNARY) { @@ -20508,10 +20396,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t operator = parser->previous; - pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); + pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~"); - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_UMINUS: { if (binding_power > PM_BINDING_POWER_UNARY) { @@ -20520,22 +20408,22 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t operator = parser->previous; - pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); + pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@"); - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_UMINUS_NUM: { parser_lex(parser); pm_token_t operator = parser->previous; - pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); + pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); if (accept1(parser, PM_TOKEN_STAR_STAR)) { pm_token_t exponent_operator = parser->previous; - pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1)); - node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0); - node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@"); + pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1)); + node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0)); + node = UP(pm_call_node_unary_create(parser, &operator, node, "-@")); } else { switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: @@ -20545,7 +20433,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parse_negative_numeric(node); break; default: - node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@"); + node = UP(pm_call_node_unary_create(parser, &operator, node, "-@")); break; } } @@ -20579,13 +20467,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pm_block_parameters_node_closing_set(block_parameters, &parser->previous); + pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous); break; } case PM_CASE_PARAMETER: { pm_accepts_block_stack_push(parser, false); - pm_token_t opening = not_provided(parser); - block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1)); + block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); break; } @@ -20603,39 +20490,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b opening = parser->previous; if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) { - body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)); + body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1))); } parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening); } else { expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN); opening = parser->previous; if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { - pm_accepts_block_stack_push(parser, true); - body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); + body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1))); } if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)); - body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)); + body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1))); } else { parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false); } - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator); } pm_constant_id_list_t locals; pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser)); - pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous); + pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous); pm_parser_scope_pop(parser); pm_accepts_block_stack_pop(parser); - return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body); + return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body)); } case PM_TOKEN_UPLUS: { if (binding_power > PM_BINDING_POWER_UNARY) { @@ -20644,13 +20529,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t operator = parser->previous; - pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); + pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1)); pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@"); - return (pm_node_t *) node; + return UP(node); } case PM_TOKEN_STRING_BEGIN: - return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1)); + return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1)); case PM_TOKEN_SYMBOL_BEGIN: { pm_lex_mode_t lex_mode = *parser->lex_modes.current; parser_lex(parser); @@ -20673,17 +20558,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b // If we get here, then we are assuming this token is closing a // parent context, so we'll indicate that to the user so that // they know how we behaved. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable)); } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) { // We're going to make a special case here, because "cannot // parse expression" is pretty generic, and we know here that we // have an unexpected token. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type)); } else { pm_parser_err_prefix(parser, diag_id); } - return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); } } } @@ -20698,8 +20583,18 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b * or any of the binary operators that can be written to a variable. */ static pm_node_t * -parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) { - pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1)); +parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { + pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1)); + + // Assignments whose value is a command call (e.g., a = b c) can only + // be followed by modifiers (if/unless/while/until/rescue) and not by + // operators with higher binding power. If we find one, emit an error + // and skip the operator and its right-hand side. + if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + parser_lex(parser); + parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + } // Contradicting binding powers, the right-hand-side value of the assignment // allows the `rescue` modifier. @@ -20709,10 +20604,10 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_ pm_token_t rescue = parser->current; parser_lex(parser); - pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); + pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); context_pop(parser); - return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right); + return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right)); } return value; @@ -20767,35 +20662,46 @@ parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) { * operator that allows multiple values after it. */ static pm_node_t * -parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) { +parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { bool permitted = true; if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false; - pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1)); + pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1)); if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE); parse_assignment_value_local(parser, value); bool single_value = true; - if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) { + // Block calls (command call + do block, e.g., `foo bar do end`) cannot + // be followed by a comma to form a multi-value RHS because each element + // of a multi-value assignment must be an `arg`, not a `block_call`. + if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) { single_value = false; - pm_token_t opening = not_provided(parser); - pm_array_node_t *array = pm_array_node_create(parser, &opening); - - pm_array_node_elements_append(array, value); - value = (pm_node_t *) array; + pm_array_node_t *array = pm_array_node_create(parser, NULL); + pm_array_node_elements_append(parser->arena, array, value); + value = UP(array); while (accept1(parser, PM_TOKEN_COMMA)) { pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1)); - pm_array_node_elements_append(array, element); - if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break; + pm_array_node_elements_append(parser->arena, array, element); + if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break; parse_assignment_value_local(parser, element); } } + // Assignments whose value is a command call (e.g., a = b c) can only + // be followed by modifiers (if/unless/while/until/rescue) and not by + // operators with higher binding power. If we find one, emit an error + // and skip the operator and its right-hand side. + if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + parser_lex(parser); + parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + } + // Contradicting binding powers, the right-hand-side value of the assignment // allows the `rescue` modifier. if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) { @@ -20810,15 +20716,15 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding // but without parenthesis. if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) { pm_call_node_t *call_node = (pm_call_node_t *) value; - if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) { + if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) { accepts_command_call_inner = true; } } - pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); + pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); context_pop(parser); - return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right); + return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right)); } return value; @@ -20835,43 +20741,18 @@ static void parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) { if (call_node->arguments != NULL) { pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS); - pm_node_destroy(parser, (pm_node_t *) call_node->arguments); + pm_node_unreference(parser, UP(call_node->arguments)); call_node->arguments = NULL; } if (call_node->block != NULL) { pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK); - pm_node_destroy(parser, (pm_node_t *) call_node->block); + pm_node_unreference(parser, UP(call_node->block)); call_node->block = NULL; } } -/** - * This struct is used to pass information between the regular expression parser - * and the named capture callback. - */ -typedef struct { - /** The parser that is parsing the regular expression. */ - pm_parser_t *parser; - - /** The call node wrapping the regular expression node. */ - pm_call_node_t *call; - - /** The match write node that is being created. */ - pm_match_write_node_t *match; - - /** The list of names that have been parsed. */ - pm_constant_id_list_t names; - - /** - * Whether the content of the regular expression is shared. This impacts - * whether or not we used owned constants or shared constants in the - * constant pool for the names of the captures. - */ - bool shared; -} parse_regular_expression_named_capture_data_t; - -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) { cursor++; @@ -20892,7 +20773,7 @@ pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const return cursor; } -static inline const uint8_t * +static PRISM_INLINE const uint8_t * pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) { uint8_t value = (uint8_t) (*cursor - '0'); cursor++; @@ -20911,8 +20792,8 @@ pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, con return cursor; } -static inline const uint8_t * -pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) { +static PRISM_INLINE const uint8_t * +pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) { const uint8_t *start = cursor - 1; cursor++; @@ -20923,7 +20804,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con if (*cursor != '{') { size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4)); - uint32_t value = escape_unicode(parser, cursor, length); + uint32_t value = escape_unicode(parser, cursor, length, error_location, 0); if (!pm_buffer_append_unicode_codepoint(unescaped, value)) { pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start)); @@ -20943,7 +20824,10 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con } size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor); - uint32_t value = escape_unicode(parser, cursor, length); + if (length == 0) { + break; + } + uint32_t value = escape_unicode(parser, cursor, length, error_location, 0); (void) pm_buffer_append_unicode_codepoint(unescaped, value); cursor += length; @@ -20953,7 +20837,7 @@ pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, con } static void -pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor) { +pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) { const uint8_t *end = source + length; pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source)); @@ -20971,7 +20855,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8 cursor = pm_named_capture_escape_octal(unescaped, cursor, end); break; case 'u': - cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end); + cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location); break; default: pm_buffer_append_byte(unescaped, '\\'); @@ -20993,10 +20877,7 @@ pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8 * capture group. */ static void -parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { - parse_regular_expression_named_capture_data_t *callback_data = (parse_regular_expression_named_capture_data_t *) data; - - pm_parser_t *parser = callback_data->parser; +parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) { pm_call_node_t *call = callback_data->call; pm_constant_id_list_t *names = &callback_data->names; @@ -21014,55 +20895,56 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { // unescaped, which is what we need. const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding); if (PRISM_UNLIKELY(cursor != NULL)) { - pm_named_capture_escape(parser, &unescaped, source, length, cursor); + pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location); source = (const uint8_t *) pm_buffer_value(&unescaped); length = pm_buffer_length(&unescaped); } - pm_location_t location; + const uint8_t *start; + const uint8_t *end; pm_constant_id_t name; // If the name of the capture group isn't a valid identifier, we do // not add it to the local table. if (!pm_slice_is_valid_local(parser, source, source + length)) { - pm_buffer_free(&unescaped); + pm_buffer_cleanup(&unescaped); return; } - if (callback_data->shared) { + if (shared) { // If the unescaped string is a slice of the source, then we can // copy the names directly. The pointers will line up. - location = (pm_location_t) { .start = source, .end = source + length }; - name = pm_parser_constant_id_location(parser, location.start, location.end); + start = source; + end = source + length; + name = pm_parser_constant_id_raw(parser, start, end); } else { // Otherwise, the name is a slice of the malloc-ed owned string, // in which case we need to copy it out into a new string. - location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end }; - - void *memory = xmalloc(length); - if (memory == NULL) abort(); + start = parser->start + PM_NODE_START(call->receiver); + end = parser->start + PM_NODE_END(call->receiver); + uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1); memcpy(memory, source, length); - name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length); + name = pm_parser_constant_id_owned(parser, memory, length); } // Add this name to the list of constants if it is valid, not duplicated, // and not a keyword. if (name != 0 && !pm_constant_id_list_includes(names, name)) { - pm_constant_id_list_append(names, name); + pm_constant_id_list_append(parser->arena, names, name); int depth; if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) { // If the local is not already a local but it is a keyword, then we // do not want to add a capture for this. if (pm_local_is_keyword((const char *) source, length)) { - pm_buffer_free(&unescaped); + pm_buffer_cleanup(&unescaped); return; } // If the identifier is not already a local, then we will add it to // the local table. - pm_parser_local_add(parser, name, location.start, location.end, 0); + pm_parser_local_add(parser, name, start, end, 0); } // Here we lazily create the MatchWriteNode since we know we're @@ -21073,45 +20955,37 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) { // Next, create the local variable target and add it to the list of // targets for the match. - pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth); - pm_node_list_append(&callback_data->match->targets, target); + pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth)); + pm_node_list_append(parser->arena, &callback_data->match->targets, target); } - pm_buffer_free(&unescaped); + pm_buffer_cleanup(&unescaped); } /** - * Potentially change a =~ with a regular expression with named captures into a - * match write node. + * Potentially change a =~ with an interpolated regular expression with named + * captures into a match write node. This is for the interpolated case where + * we have concatenated content rather than a regular expression node. */ static pm_node_t * -parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) { - parse_regular_expression_named_capture_data_t callback_data = { - .parser = parser, +parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) { + pm_regexp_name_data_t callback_data = { .call = call, + .match = NULL, .names = { 0 }, - .shared = content->type == PM_STRING_SHARED }; - parse_regular_expression_error_data_t error_data = { - .parser = parser, - .start = call->receiver->location.start, - .end = call->receiver->location.end, - .shared = content->type == PM_STRING_SHARED - }; - - pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data); - pm_constant_id_list_free(&callback_data.names); + pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data); if (callback_data.match != NULL) { - return (pm_node_t *) callback_data.match; + return UP(callback_data.match); } else { - return (pm_node_t *) call; + return UP(call); } } -static inline pm_node_t * -parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) { +static PRISM_INLINE pm_node_t * +parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) { pm_token_t token = parser->current; switch (token.type) { @@ -21124,13 +20998,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // is parsed because it could be referenced in the value. pm_call_node_t *call_node = (pm_call_node_t *) node; if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0); + pm_parser_local_add_location(parser, &call_node->message_loc, 0); } } PRISM_FALLTHROUGH case PM_CASE_WRITABLE: { + // When we have `it = value`, we need to add `it` as a local + // variable before parsing the value, in case the value + // references the variable. + if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { + pm_parser_local_add_location(parser, &node->location, 0); + } + parser_lex(parser); - pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); + pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) { pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE); @@ -21143,8 +21024,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_multi_target_node_targets_append(parser, multi_target, node); parser_lex(parser); - pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); - return parse_write(parser, (pm_node_t *) multi_target, &token, value); + pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); + return parse_write(parser, UP(multi_target), &token, value); } case PM_SOURCE_ENCODING_NODE: case PM_FALSE_NODE: @@ -21156,7 +21037,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // In these special cases, we have specific error messages // and we will replace them with local variable writes. parser_lex(parser); - pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); + pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1)); return parse_unwriteable_write(parser, node, &token, value); } default: @@ -21177,71 +21058,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_GLOBAL_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_CLASS_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_CONSTANT_PATH_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value)); return parse_shareable_constant_write(parser, write); } case PM_CONSTANT_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return parse_shareable_constant_write(parser, write); } case PM_INSTANCE_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0)); - parse_target_implicit_parameter(parser, node); - pm_node_destroy(parser, node); + pm_node_unreference(parser, node); return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); - parse_target_implicit_parameter(parser, node); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start); + pm_node_unreference(parser, node); } pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth)); - pm_node_destroy(parser, node); return result; } case PM_CALL_NODE: { @@ -21251,16 +21126,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0)); - pm_node_destroy(parser, (pm_node_t *) cast); return result; } @@ -21272,8 +21144,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // this is an aref expression, and we can transform it into // an aset expression. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) { - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + return UP(pm_index_and_write_node_create(parser, cast, &token, value)); } // If this node cannot be writable, then we have an error. @@ -21284,8 +21156,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t } parse_call_operator_write(parser, cast, &token); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1)); + return UP(pm_call_and_write_node_create(parser, cast, &token, value)); } case PM_MULTI_WRITE_NODE: { parser_lex(parser); @@ -21311,71 +21183,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_GLOBAL_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_CLASS_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_CONSTANT_PATH_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value)); return parse_shareable_constant_write(parser, write); } case PM_CONSTANT_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return parse_shareable_constant_write(parser, write); } case PM_INSTANCE_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0)); - parse_target_implicit_parameter(parser, node); - pm_node_destroy(parser, node); + pm_node_unreference(parser, node); return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); - parse_target_implicit_parameter(parser, node); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node)); + pm_node_unreference(parser, node); } pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth)); - pm_node_destroy(parser, node); return result; } case PM_CALL_NODE: { @@ -21385,16 +21251,13 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); - - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0)); - pm_node_destroy(parser, (pm_node_t *) cast); return result; } @@ -21406,8 +21269,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // this is an aref expression, and we can transform it into // an aset expression. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) { - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + return UP(pm_index_or_write_node_create(parser, cast, &token, value)); } // If this node cannot be writable, then we have an error. @@ -21418,8 +21281,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t } parse_call_operator_write(parser, cast, &token); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1)); + return UP(pm_call_or_write_node_create(parser, cast, &token, value)); } case PM_MULTI_WRITE_NODE: { parser_lex(parser); @@ -21455,71 +21318,65 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_GLOBAL_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_CLASS_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_CONSTANT_PATH_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value)); return parse_shareable_constant_write(parser, write); } case PM_CONSTANT_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return parse_shareable_constant_write(parser, write); } case PM_INSTANCE_VARIABLE_READ_NODE: { parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value)); - pm_node_destroy(parser, node); return result; } case PM_IT_LOCAL_VARIABLE_READ_NODE: { pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2); parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0)); - parse_target_implicit_parameter(parser, node); - pm_node_destroy(parser, node); + pm_node_unreference(parser, node); return result; } case PM_LOCAL_VARIABLE_READ_NODE: { - if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) { - PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, node->location.start); - parse_target_implicit_parameter(parser, node); + if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) { + PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node)); + pm_node_unreference(parser, node); } pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; parser_lex(parser); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth)); - pm_node_destroy(parser, node); return result; } case PM_CALL_NODE: { @@ -21530,14 +21387,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // receiver that could have been a local variable) then we // will transform it into a local variable write. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) { - pm_location_t *message_loc = &cast->message_loc; - pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end); + pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length); + pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0)); - pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0); - - pm_node_destroy(parser, (pm_node_t *) cast); return result; } @@ -21545,8 +21399,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // this is an aref expression, and we can transform it into // an aset expression. if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) { - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + return UP(pm_index_operator_write_node_create(parser, cast, &token, value)); } // If this node cannot be writable, then we have an error. @@ -21557,8 +21411,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t } parse_call_operator_write(parser, cast, &token); - pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value); + pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + return UP(pm_call_operator_write_node_create(parser, cast, &token, value)); } case PM_MULTI_WRITE_NODE: { parser_lex(parser); @@ -21571,7 +21425,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // In this case we have an operator but we don't know what it's for. // We need to treat it as an error. For now, we'll mark it as an error // and just skip right past it. - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type)); return node; } } @@ -21579,15 +21433,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_TOKEN_KEYWORD_AND: { parser_lex(parser); - pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_and_node_create(parser, node, &token, right); + pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + return UP(pm_and_node_create(parser, node, &token, right)); } case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_PIPE_PIPE: { parser_lex(parser); - pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_or_node_create(parser, node, &token, right); + pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + return UP(pm_or_node_create(parser, node, &token, right)); } case PM_TOKEN_EQUAL_TILDE: { // Note that we _must_ parse the value before adding the local @@ -21598,11 +21452,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // // In this case, `foo` should be a method call and not a local yet. parser_lex(parser); - pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); // By default, we're going to create a call node and then return it. pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0); - pm_node_t *result = (pm_node_t *) call; + pm_node_t *result = UP(call); // If the receiver of this =~ is a regular expression node, then we // need to introduce local variables for it based on its named @@ -21643,14 +21497,25 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_string_t owned; pm_string_owned_init(&owned, (uint8_t *) memory, total_length); - result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)); - pm_string_free(&owned); + result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)); + pm_string_cleanup(&owned); } } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) { - // If we have a regular expression node, then we can just parse - // the named captures directly off the unescaped string. - const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped; - result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED)); + // If we have a regular expression node, then we can parse + // the named captures and validate encoding in one pass. + pm_regular_expression_node_t *regexp = (pm_regular_expression_node_t *) node; + + pm_regexp_name_data_t name_data = { + .call = call, + .match = NULL, + .names = { 0 }, + }; + + pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data)); + + if (name_data.match != NULL) { + result = UP(name_data.match); + } } return result; @@ -21682,21 +21547,21 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } @@ -21704,20 +21569,20 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t break; } - pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0); + pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + return UP(pm_call_node_binary_create(parser, node, &token, argument, 0)); } case PM_TOKEN_GREATER: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_LESS: case PM_TOKEN_LESS_EQUAL: { if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) { - PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON); + PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON); } parser_lex(parser); - pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON); + pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON)); } case PM_TOKEN_AMPERSAND_DOT: case PM_TOKEN_DOT: { @@ -21728,28 +21593,28 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // This if statement handles the foo.() syntax. if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments); + return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments)); } switch (PM_NODE_TYPE(node)) { case PM_RESCUE_MODIFIER_NODE: { pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_AND_NODE: { pm_and_node_t *cast = (pm_and_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } case PM_OR_NODE: { pm_or_node_t *cast = (pm_or_node_t *) node; if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type)); } break; } @@ -21770,23 +21635,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t break; } default: { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type)); - message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type)); + message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; } } - parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments); if ( (previous_binding_power == PM_BINDING_POWER_STATEMENT) && arguments.arguments == NULL && - arguments.opening_loc.start == NULL && + arguments.opening_loc.length == 0 && match1(parser, PM_TOKEN_COMMA) ) { - return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } else { - return (pm_node_t *) call; + return UP(call); } } case PM_TOKEN_DOT_DOT: @@ -21795,40 +21660,40 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_node_t *right = NULL; if (token_begins_expression_p(parser->current.type)) { - right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); } - return (pm_node_t *) pm_range_node_create(parser, node, &token, right); + return UP(pm_range_node_create(parser, node, &token, right)); } case PM_TOKEN_KEYWORD_IF_MODIFIER: { pm_token_t keyword = parser->current; parser_lex(parser); - pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate); + pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1)); + return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate)); } case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: { pm_token_t keyword = parser->current; parser_lex(parser); - pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate); + pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1)); + return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate)); } case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: { parser_lex(parser); pm_statements_node_t *statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, statements, node, true); - pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0); + pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1)); + return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0)); } case PM_TOKEN_KEYWORD_WHILE_MODIFIER: { parser_lex(parser); pm_statements_node_t *statements = pm_statements_node_create(parser); pm_statements_node_body_append(parser, statements, node, true); - pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0); + pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1)); + return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0)); } case PM_TOKEN_QUESTION_MARK: { context_push(parser, PM_CONTEXT_TERNARY); @@ -21838,7 +21703,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_token_t qmark = parser->current; parser_lex(parser); - pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1)); + pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1)); if (parser->recovering) { // If parsing the true expression of this ternary resulted in a syntax @@ -21847,27 +21712,23 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // before the `expect` function call to make sure it doesn't // accidentally move past a ':' token that occurs after the syntax // error. - pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; - pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end); + pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon))); context_pop(parser); pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression); + return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression)); } accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON); pm_token_t colon = parser->previous; - pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1)); + pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1)); context_pop(parser); pop_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); - - return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression); + return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression)); } case PM_TOKEN_COLON_COLON: { parser_lex(parser); @@ -21880,7 +21741,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t if ( (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) || - (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) + ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ) { // If we have a constant immediately following a '::' operator, then // this can either be a constant path or a method call, depending on @@ -21891,11 +21752,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_token_t message = parser->previous; pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); - path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments); + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); + path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments)); } else { // Otherwise, this is a constant path. That would look like Foo::Bar. - path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous); + path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous)); } // If this is followed by a comma then it is a multiple assignment. @@ -21915,15 +21776,15 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t // If we have an identifier following a '::' operator, then it is for // sure a method call. pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1)); + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments); // If this is followed by a comma then it is a multiple assignment. if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { - return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } - return (pm_node_t *) call; + return UP(call); } case PM_TOKEN_PARENTHESIS_LEFT: { // If we have a parenthesis following a '::' operator, then it is the @@ -21931,11 +21792,11 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_arguments_t arguments = { 0 }; parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1)); - return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments); + return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments)); } default: { expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); - return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous); + return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous)); } } } @@ -21944,31 +21805,31 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t parser_lex(parser); accept1(parser, PM_TOKEN_NEWLINE); - pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); + pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); context_pop(parser); - return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value); + return UP(pm_rescue_modifier_node_create(parser, node, &token, value)); } case PM_TOKEN_BRACKET_LEFT: { parser_lex(parser); pm_arguments_t arguments = { 0 }; - arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.opening_loc = TOK2LOC(parser, &parser->previous); if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { pm_accepts_block_stack_push(parser, true); - parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1)); + parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1)); pm_accepts_block_stack_pop(parser); expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET); } - arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous); + arguments.closing_loc = TOK2LOC(parser, &parser->previous); // If we have a comma after the closing bracket then this is a multiple // assignment and we should parse the targets. if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments); - return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } // If we're at the end of the arguments, we can now check if there is a @@ -21984,17 +21845,17 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t if (block != NULL) { if (arguments.block != NULL) { - pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK); + pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK); if (arguments.arguments == NULL) { arguments.arguments = pm_arguments_node_create(parser); } - pm_arguments_node_arguments_append(arguments.arguments, arguments.block); + pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block); } - arguments.block = (pm_node_t *) block; + arguments.block = UP(block); } - return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments); + return UP(pm_call_node_aref_create(parser, node, &arguments)); } case PM_TOKEN_KEYWORD_IN: { bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; @@ -22009,9 +21870,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1)); parser->pattern_matching_newlines = previous_pattern_matching_newlines; - pm_constant_id_list_free(&captures); - return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator); + return UP(pm_match_predicate_node_create(parser, node, pattern, &operator)); } case PM_TOKEN_EQUAL_GREATER: { bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; @@ -22026,9 +21886,8 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1)); parser->pattern_matching_newlines = previous_pattern_matching_newlines; - pm_constant_id_list_free(&captures); - return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator); + return UP(pm_match_required_node_create(parser, node, pattern, &operator)); } default: assert(false && "unreachable"); @@ -22041,16 +21900,83 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t #undef PM_PARSE_PATTERN_MULTI /** - * Determine if a given call node looks like a "command", which means it has - * arguments but does not have parentheses. + * Some nodes act as statements and limit which operators can follow. This + * function inspects the node and the upcoming token to determine whether the + * expression loop should stop. It is called both after prefix parsing and after + * each infix operator. + * + * As a side effect, this function also attaches do-blocks to command-style call + * nodes when appropriate. + * + * Returns true if the expression loop should stop (i.e., the next operator + * should not be consumed). */ -static inline bool -pm_call_node_command_p(const pm_call_node_t *node) { - return ( - (node->opening_loc.start == NULL) && - (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) && - (node->arguments != NULL || node->block != NULL) - ); +static bool +parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) { + pm_binding_power_t left = pm_binding_powers[parser->current.type].left; + + switch (PM_NODE_TYPE(node)) { + case PM_MULTI_WRITE_NODE: + case PM_RETURN_NODE: + case PM_BREAK_NODE: + case PM_NEXT_NODE: + return left > PM_BINDING_POWER_MODIFIER; + case PM_CLASS_VARIABLE_WRITE_NODE: + case PM_CONSTANT_PATH_WRITE_NODE: + case PM_CONSTANT_WRITE_NODE: + case PM_GLOBAL_VARIABLE_WRITE_NODE: + case PM_INSTANCE_VARIABLE_WRITE_NODE: + case PM_LOCAL_VARIABLE_WRITE_NODE: + return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER; + case PM_CALL_NODE: { + // Calls with an implicit array on the right-hand side are + // statements and can only be followed by modifiers. + if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) { + return left > PM_BINDING_POWER_MODIFIER; + } + + // Command-style calls (including block commands like + // `foo bar do end`) can only be followed by composition + // (and/or) and modifier (if/unless/etc.) operators. + if (pm_command_call_value_p(node)) { + return left > PM_BINDING_POWER_COMPOSITION; + } + + // A block call (command with do-block, or any call chained + // from one) can only be followed by call chaining (., ::, + // &.), composition (and/or), and modifier operators. + if (pm_block_call_p(node)) { + return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL; + } + + return false; + } + case PM_SUPER_NODE: + case PM_YIELD_NODE: + // Command-style super/yield (without parens) can only be followed + // by composition and modifier operators. + if (pm_command_call_value_p(node)) { + return left > PM_BINDING_POWER_COMPOSITION; + } + return false; + case PM_DEF_NODE: + // An endless method whose body is a command-style call (e.g., + // `def f = foo bar`) is a command assignment and can only be + // followed by modifiers. + return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node); + case PM_RESCUE_MODIFIER_NODE: + // A rescue modifier whose handler is a pattern match (=> or in) + // produces a statement and cannot be followed by operators above + // the modifier level. + if (left > PM_BINDING_POWER_MODIFIER) { + pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node; + pm_node_t *rescue_expression = cast->rescue_expression; + return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE); + } + return false; + default: + return false; + } } /** @@ -22062,46 +21988,40 @@ pm_call_node_command_p(const pm_call_node_t *node) { * determine if they need to perform additional cleanup. */ static pm_node_t * -parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) { +parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) { pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP); - return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current))); } - pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth); + pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth); + // Some prefix nodes are statements and can only be followed by modifiers + // (if/unless/while/until/rescue) or nothing at all. We check these cheaply + // here before entering the infix loop. switch (PM_NODE_TYPE(node)) { - case PM_MISSING_NODE: - // If we found a syntax error, then the type of node returned by - // parse_expression_prefix is going to be a missing node. + case PM_ERROR_RECOVERY_NODE: return node; case PM_PRE_EXECUTION_NODE: + return node; case PM_POST_EXECUTION_NODE: case PM_ALIAS_GLOBAL_VARIABLE_NODE: case PM_ALIAS_METHOD_NODE: - case PM_MULTI_WRITE_NODE: case PM_UNDEF_NODE: - // These expressions are statements, and cannot be followed by - // operators (except modifiers). if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) { return node; } break; case PM_CALL_NODE: - // If we have a call node, then we need to check if it looks like a - // method call without parentheses that contains arguments. If it - // does, then it has different rules for parsing infix operators, - // namely that it only accepts composition (and/or) and modifiers - // (if/unless/etc.). - if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) { + case PM_SUPER_NODE: + case PM_YIELD_NODE: + case PM_DEF_NODE: + if (parse_expression_terminator(parser, node)) { return node; } break; case PM_SYMBOL_NODE: - // If we have a symbol node that is being parsed as a label, then we - // need to immediately return, because there should never be an - // infix operator following this node. - if (pm_symbol_node_label_p(node)) { + if (pm_symbol_node_label_p(parser, node)) { return node; } break; @@ -22109,8 +22029,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc break; } - // Otherwise we'll look and see if the next token can be parsed as an infix - // operator. If it can, then we'll parse it using parse_expression_infix. + // Look and see if the next token can be parsed as an infix operator. If it + // can, then we'll parse it using parse_expression_infix. pm_binding_powers_t current_binding_powers; pm_token_type_t current_token_type; @@ -22120,39 +22040,8 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc binding_power <= current_binding_powers.left && current_binding_powers.binary ) { - node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1)); - - switch (PM_NODE_TYPE(node)) { - case PM_MULTI_WRITE_NODE: - // Multi-write nodes are statements, and cannot be followed by - // operators except modifiers. - if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) { - return node; - } - break; - case PM_CLASS_VARIABLE_WRITE_NODE: - case PM_CONSTANT_PATH_WRITE_NODE: - case PM_CONSTANT_WRITE_NODE: - case PM_GLOBAL_VARIABLE_WRITE_NODE: - case PM_INSTANCE_VARIABLE_WRITE_NODE: - case PM_LOCAL_VARIABLE_WRITE_NODE: - // These expressions are statements, by virtue of the right-hand - // side of their write being an implicit array. - if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) { - return node; - } - break; - case PM_CALL_NODE: - // These expressions are also statements, by virtue of the - // right-hand side of the expression (i.e., the last argument to - // the call node) being an implicit array. - if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) { - return node; - } - break; - default: - break; - } + node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1)); + if (parse_expression_terminator(parser, node)) return node; // If the operator is nonassoc and we should not be able to parse the // upcoming infix operator, break. @@ -22160,7 +22049,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // If this is a non-assoc operator and we are about to parse the // exact same operator, then we need to add an error. if (match1(parser, current_token_type)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type)); break; } @@ -22173,7 +22062,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc // if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) { if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type)); + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type)); break; } @@ -22185,7 +22074,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc } } - if (accepts_command_call) { + if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) { // A command-style method call is only accepted on method chains. // Thus, we check whether the parsed node can continue method chains. // The method chain can continue if the parsed node is one of the following five kinds: @@ -22200,29 +22089,29 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc if ( // (1) foo[1] !( - cast->call_operator_loc.start == NULL && - cast->message_loc.start != NULL && - cast->message_loc.start[0] == '[' && - cast->message_loc.end[-1] == ']' + cast->call_operator_loc.length == 0 && + cast->message_loc.length > 0 && + parser->start[cast->message_loc.start] == '[' && + parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']' ) && // (2) foo.bar !( - cast->call_operator_loc.start != NULL && + cast->call_operator_loc.length > 0 && cast->arguments == NULL && cast->block == NULL && - cast->opening_loc.start == NULL + cast->opening_loc.length == 0 ) && // (3) foo.bar(1) !( - cast->call_operator_loc.start != NULL && - cast->opening_loc.start != NULL + cast->call_operator_loc.length > 0 && + cast->opening_loc.length > 0 ) && // (4) foo.bar do end !( cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE) ) ) { - accepts_command_call = false; + flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL; } break; } @@ -22230,10 +22119,21 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc case PM_CONSTANT_PATH_NODE: break; default: - accepts_command_call = false; + flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL; break; } } + + if (context_terminator(parser->current_context->context, &parser->current)) { + pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type]; + if ( + !next_binding_powers.binary || + binding_power > next_binding_powers.left || + (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node)) + ) { + return node; + } + } } return node; @@ -22252,15 +22152,16 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) { pm_arguments_node_t *arguments = pm_arguments_node_create(parser); pm_arguments_node_arguments_append( + parser->arena, arguments, - (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)) + UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))) ); - pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create( + pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create( parser, arguments, pm_parser_constant_id_constant(parser, "print", 5) - ), true); + )), true); } if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) { @@ -22271,47 +22172,49 @@ wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) { pm_arguments_node_t *arguments = pm_arguments_node_create(parser); pm_arguments_node_arguments_append( + parser->arena, arguments, - (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)) + UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))) ); pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)); - pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments); + pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments); pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create( parser, pm_parser_constant_id_constant(parser, "$F", 2), - (pm_node_t *) call + UP(call) ); - pm_statements_node_body_prepend(statements, (pm_node_t *) write); + pm_statements_node_body_prepend(parser->arena, statements, UP(write)); } pm_arguments_node_t *arguments = pm_arguments_node_create(parser); pm_arguments_node_arguments_append( + parser->arena, arguments, - (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)) + UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))) ); if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) { pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser); - pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create( + pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create( parser, - (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"), - &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }, - (pm_node_t *) pm_true_node_synthesized_create(parser) - )); + UP(pm_symbol_node_synthesized_create(parser, "chomp")), + NULL, + UP(pm_true_node_synthesized_create(parser)) + ))); - pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords); - pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS); + pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords)); + pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS); } pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser); - pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create( + pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create( parser, - (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)), + UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))), statements - ), true); + )), true); statements = wrapped_statements; } @@ -22355,7 +22258,6 @@ parse_program(pm_parser_t *parser) { statements = wrap_statements(parser, statements); } else { flush_block_exits(parser, previous_block_exits); - pm_node_list_free(¤t_block_exits); } // If this is an empty file, then we're still going to parse all of the @@ -22363,10 +22265,10 @@ parse_program(pm_parser_t *parser) { // correct the location information. if (statements == NULL) { statements = pm_statements_node_create(parser); - pm_statements_node_location_set(statements, parser->start, parser->start); + statements->base.location = (pm_location_t) { 0 }; } - return (pm_node_t *) pm_program_node_create(parser, &locals, statements); + return UP(pm_program_node_create(parser, &locals, statements)); } /******************************************************************************/ @@ -22375,8 +22277,8 @@ parse_program(pm_parser_t *parser) { /** * A vendored version of strnstr that is used to find a substring within a - * string with a given length. This function is used to search for the Ruby - * engine name within a shebang when the -x option is passed to Ruby. + * string with a given length. This function is used to search for "ruby" + * within a shebang when the -x option is passed to Ruby. * * The only modification that we made here is that we don't do NULL byte checks * because we know the little parameter will not have a NULL byte and we allow @@ -22386,7 +22288,7 @@ static const char * pm_strnstr(const char *big, const char *little, size_t big_length) { size_t little_length = strlen(little); - for (const char *big_end = big + big_length; big < big_end; big++) { + for (const char *max = big + big_length - little_length; big <= max; big++) { if (*big == *little && memcmp(big, little, little_length) == 0) return big; } @@ -22404,7 +22306,7 @@ pm_strnstr(const char *big, const char *little, size_t big_length) { static void pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) { if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') { - pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN); + pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN); } } #endif @@ -22439,11 +22341,14 @@ pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const c /** * Initialize a parser with the given start and end pointers. */ -PRISM_EXPORTED_FUNCTION void -pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) { +void +pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) { + assert(arena != NULL); assert(source != NULL); *parser = (pm_parser_t) { + .arena = arena, + .metadata_arena = { 0 }, .node_id = 0, .lex_state = PM_LEX_STATE_BEG, .enclosure_nesting = 0, @@ -22462,7 +22367,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .current = { .type = PM_TOKEN_EOF, .start = source, .end = source }, .next_start = NULL, .heredoc_end = NULL, - .data_loc = { .start = NULL, .end = NULL }, + .data_loc = { 0 }, .comment_list = { 0 }, .magic_comment_list = { 0 }, .warning_list = { 0 }, @@ -22472,11 +22377,11 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .encoding = PM_ENCODING_UTF_8_ENTRY, .encoding_changed_callback = NULL, .encoding_comment_start = source, - .lex_callback = NULL, + .lex_callback = { 0 }, .filepath = { 0 }, .constant_pool = { 0 }, - .newline_list = { 0 }, - .integer_base = 0, + .line_offsets = { 0 }, + .integer = { 0 }, .current_string = PM_STRING_EMPTY, .start_line = 1, .explicit_encoding = NULL, @@ -22485,6 +22390,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .partial_script = false, .command_start = true, .recovering = false, + .continuable = true, .encoding_locked = false, .encoding_changed = false, .pattern_matching_newlines = false, @@ -22492,32 +22398,30 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .current_block_exits = NULL, .semantic_token_seen = false, .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET, - .current_regular_expression_ascii_only = false, .warn_mismatched_indentation = true }; - // Initialize the constant pool. We're going to completely guess as to the - // number of constants that we'll need based on the size of the input. The - // ratio we chose here is actually less arbitrary than you might think. - // - // We took ~50K Ruby files and measured the size of the file versus the - // number of constants that were found in those files. Then we found the - // average and standard deviation of the ratios of constants/bytesize. Then - // we added 1.34 standard deviations to the average to get a ratio that - // would fit 75% of the files (for a two-tailed distribution). This works - // because there was about a 0.77 correlation and the distribution was - // roughly normal. - // - // This ratio will need to change if we add more constants to the constant - // pool for another node type. - uint32_t constant_size = ((uint32_t) size) / 95; - pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size); - - // Initialize the newline list. Similar to the constant pool, we're going to - // guess at the number of newlines that we'll need based on the size of the - // input. + /* Pre-size the arenas based on input size to reduce the number of block + * allocations (and the kernel page zeroing they trigger). The ratios were + * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input. + * The reserve call is a no-op when the capacity is at or below the default + * arena block size, so small inputs don't waste an extra allocation. */ + if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4); + if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4); + + /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the + * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135. + * We use 120 as a balance between over-allocation waste and resize + * frequency. Resizes are cheap with arena allocation, so we lean toward + * under-estimating. */ + uint32_t constant_size = ((uint32_t) size) / 120; + pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size); + + /* Initialize the line offset list. Similar to the constant pool, we are + * going to estimate the number of newlines that we will need based on the + * size of the input. */ size_t newline_size = size / 22; - pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size); + pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size); // If options were provided to this parse, establish them here. if (options != NULL) { @@ -22554,7 +22458,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm if (parser->parsing_eval) parser->warn_mismatched_indentation = false; for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { - const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index); + const pm_options_scope_t *scope = pm_options_scope(options, scope_index); pm_parser_scope_push(parser, scope_index == 0); // Scopes given from the outside are not allowed to have numbered @@ -22562,20 +22466,24 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED; for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { - const pm_string_t *local = pm_options_scope_local_get(scope, local_index); + const pm_string_t *local = pm_options_scope_local(scope, local_index); const uint8_t *source = pm_string_source(local); size_t length = pm_string_length(local); - void *allocated = xmalloc(length); - if (allocated == NULL) continue; - + uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1); memcpy(allocated, source, length); - pm_parser_local_add_owned(parser, (uint8_t *) allocated, length); + pm_parser_local_add_owned(parser, allocated, length); } } } + // Now that we have established the user-provided options, check if + // a version was given and parse as the latest version otherwise. + if (parser->version == PM_OPTIONS_VERSION_UNSET) { + parser->version = PM_OPTIONS_VERSION_LATEST; + } + pm_accepts_block_stack_push(parser, true); // Skip past the UTF-8 BOM if it exists. @@ -22609,8 +22517,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // If the shebang does not include "ruby" and this is the main script being // parsed, then we will start searching the file for a shebang that does // contain "ruby" as if -x were passed on the command line. - const uint8_t *newline = next_newline(parser->start, parser->end - parser->start); - size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start); + const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end); + size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end); if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') { const char *engine; @@ -22629,7 +22537,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm } search_shebang = false; - } else if (options->main_script && !parser->parsing_eval) { + } else if (options != NULL && options->main_script && !parser->parsing_eval) { search_shebang = true; } } @@ -22650,7 +22558,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm const uint8_t *newline = next_newline(cursor, parser->end - cursor); while (newline != NULL) { - pm_newline_list_append(&parser->newline_list, newline); + pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1)); cursor = newline + 1; newline = next_newline(cursor, parser->end - cursor); @@ -22679,8 +22587,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor }; parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor }; } else { - pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND); - pm_newline_list_clear(&parser->newline_list); + pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND); + pm_line_offset_list_clear(&parser->line_offsets); } } @@ -22691,56 +22599,28 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm } /** - * Register a callback that will be called whenever prism changes the encoding - * it is using to parse based on the magic comment. - */ -PRISM_EXPORTED_FUNCTION void -pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) { - parser->encoding_changed_callback = callback; -} - -/** - * Free all of the memory associated with the comment list. - */ -static inline void -pm_comment_list_free(pm_list_t *list) { - pm_list_node_t *node, *next; - - for (node = list->head; node != NULL; node = next) { - next = node->next; - - pm_comment_t *comment = (pm_comment_t *) node; - xfree(comment); - } -} - -/** - * Free all of the memory associated with the magic comment list. + * Allocate and initialize a parser with the given start and end pointers. + * + * The resulting parser must eventually be freed with `pm_parser_free()`. The + * arena is caller-owned and must outlive the parser — `pm_parser_cleanup()` + * does not free the arena. */ -static inline void -pm_magic_comment_list_free(pm_list_t *list) { - pm_list_node_t *node, *next; - - for (node = list->head; node != NULL; node = next) { - next = node->next; +pm_parser_t * +pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) { + pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t)); + if (parser == NULL) abort(); - pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node; - xfree(magic_comment); - } + pm_parser_init(arena, parser, source, size, options); + return parser; } /** * Free any memory associated with the given parser. */ -PRISM_EXPORTED_FUNCTION void -pm_parser_free(pm_parser_t *parser) { - pm_string_free(&parser->filepath); - pm_diagnostic_list_free(&parser->error_list); - pm_diagnostic_list_free(&parser->warning_list); - pm_comment_list_free(&parser->comment_list); - pm_magic_comment_list_free(&parser->magic_comment_list); - pm_constant_pool_free(&parser->constant_pool); - pm_newline_list_free(&parser->newline_list); +void +pm_parser_cleanup(pm_parser_t *parser) { + pm_string_cleanup(&parser->filepath); + pm_arena_cleanup(&parser->metadata_arena); while (parser->current_scope != NULL) { // Normally, popping the scope doesn't free the locals since it is @@ -22756,145 +22636,224 @@ pm_parser_free(pm_parser_t *parser) { } /** - * Parse the Ruby source associated with the given parser and return the tree. + * Free both the memory held by the given parser and the parser itself. */ -PRISM_EXPORTED_FUNCTION pm_node_t * -pm_parse(pm_parser_t *parser) { - return parse_program(parser); +void +pm_parser_free(pm_parser_t *parser) { + pm_parser_cleanup(parser); + xfree_sized(parser, sizeof(pm_parser_t)); } /** - * Read into the stream until the gets callback returns false. If the last read - * line from the stream matches an __END__ marker, then halt and return false, - * otherwise return true. + * Returns true if the given diagnostic ID represents an error that cannot be + * fixed by appending more input. These are errors where the existing source + * contains definitively invalid syntax (as opposed to merely incomplete input). */ static bool -pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets) { -#define LINE_SIZE 4096 - char line[LINE_SIZE]; - - while (memset(line, '\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) { - size_t length = LINE_SIZE; - while (length > 0 && line[length - 1] == '\n') length--; - - if (length == LINE_SIZE) { - // If we read a line that is the maximum size and it doesn't end - // with a newline, then we'll just append it to the buffer and - // continue reading. - length--; - pm_buffer_append_string(buffer, line, length); - continue; - } - - // Append the line to the buffer. - length--; - pm_buffer_append_string(buffer, line, length); - - // Check if the line matches the __END__ marker. If it does, then stop - // reading and return false. In most circumstances, this means we should - // stop reading from the stream so that the DATA constant can pick it - // up. - switch (length) { - case 7: - if (strncmp(line, "__END__", 7) == 0) return false; - break; - case 8: - if (strncmp(line, "__END__\n", 8) == 0) return false; - break; - case 9: - if (strncmp(line, "__END__\r\n", 9) == 0) return false; - break; - } +pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) { + switch (diag_id) { + case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR: + case PM_ERR_BEGIN_UPCASE_BRACE: + case PM_ERR_CLASS_VARIABLE_BARE: + case PM_ERR_END_UPCASE_BRACE: + case PM_ERR_ESCAPE_INVALID_HEXADECIMAL: + case PM_ERR_ESCAPE_INVALID_UNICODE_LIST: + case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT: + case PM_ERR_EXPRESSION_NOT_WRITABLE: + case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF: + case PM_ERR_FLOAT_PARSE: + case PM_ERR_GLOBAL_VARIABLE_BARE: + case PM_ERR_HASH_KEY: + case PM_ERR_HEREDOC_IDENTIFIER: + case PM_ERR_INSTANCE_VARIABLE_BARE: + case PM_ERR_INVALID_BLOCK_EXIT: + case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT: + case PM_ERR_INVALID_FLOAT_EXPONENT: + case PM_ERR_INVALID_NUMBER_BINARY: + case PM_ERR_INVALID_NUMBER_DECIMAL: + case PM_ERR_INVALID_NUMBER_HEXADECIMAL: + case PM_ERR_INVALID_NUMBER_OCTAL: + case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING: + case PM_ERR_NO_LOCAL_VARIABLE: + case PM_ERR_PARAMETER_ORDER: + case PM_ERR_STATEMENT_UNDEF: + case PM_ERR_VOID_EXPRESSION: + return true; + default: + return false; } - - return true; -#undef LINE_SIZE } /** - * Determine if there was an unterminated heredoc at the end of the input, which - * would mean the stream isn't finished and we should keep reading. + * Determine whether the source parsed by the given parser could become valid if + * more input were appended. This is used by tools like IRB to decide whether to + * prompt for continuation or to display an error. + * + * The parser starts with continuable=true. This function scans all errors to + * detect two categories of non-continuable errors: + * + * 1. Fatal errors: errors like invalid number literals or bare global variables + * that indicate definitively invalid syntax. These are only considered fatal + * if they occur before EOF (at EOF they could be from truncated input, e.g. + * `"\x` is an incomplete hex escape). * - * For the other lex modes we can check if the lex mode has been closed, but for - * heredocs when we hit EOF we close the lex mode and then go back to parse the - * rest of the line after the heredoc declaration so that we get more of the - * syntax tree. + * 2. Stray tokens: unexpected_token_ignore and unexpected_token_close_context + * errors indicate tokens that don't belong. A stray token is a cascade + * effect (and does not prevent continuability) if: + * + * a. A non-stray, non-fatal error appeared earlier in the error list at a + * strictly earlier source position (the stray was caused by a preceding + * parse failure, e.g. a truncated heredoc), OR + * b. The stray token is at EOF, starts after position 0 (there is valid + * code before it), and either is a single byte (likely a truncated + * token like `\`) or there are non-stray errors elsewhere. + * + * Closing delimiters (`)`, `]`, `}`) at EOF are always genuinely stray — + * they are complete tokens and cannot become part of a longer valid + * construct by appending more input. + * + * c. The stray token is `=` at the start of a line, which could be the + * beginning of `=begin` (an embedded document). The remaining bytes + * after `=` may parse as an identifier, so the error is not at EOF, + * but the construct is genuinely incomplete. */ -static bool -pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) { - pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head; +static void +pm_parse_continuable(pm_parser_t *parser) { + // If there are no errors then there is nothing to continue. + if (parser->error_list.size == 0) { + parser->continuable = false; + return; + } - for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) { - if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) { - return true; + if (!parser->continuable) return; + + size_t source_length = (size_t) (parser->end - parser->start); + + // First pass: check if there are any non-stray, non-fatal errors. + bool has_non_stray_error = false; + for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { + if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) { + has_non_stray_error = true; + break; } } - return false; -} + // Second pass: check each error. We track the minimum source position + // among non-stray, non-fatal errors seen so far in list order, which + // lets us detect cascade stray tokens. + size_t non_stray_min_start = SIZE_MAX; -/** - * Parse a stream of Ruby source and return the tree. - * - * Prism is designed around having the entire source in memory at once, but you - * can stream stdin in to Ruby so we need to support a streaming API. - */ -PRISM_EXPORTED_FUNCTION pm_node_t * -pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const pm_options_t *options) { - pm_buffer_init(buffer); + for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { + size_t error_start = (size_t) error->location.start; + size_t error_end = error_start + (size_t) error->location.length; + bool at_eof = error_end >= source_length; - bool eof = pm_parse_stream_read(buffer, stream, stream_fgets); - pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); - pm_node_t *node = pm_parse(parser); + // Fatal errors are non-continuable unless they occur at EOF. + if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) { + parser->continuable = false; + return; + } - while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) { - pm_node_destroy(parser, node); - eof = pm_parse_stream_read(buffer, stream, stream_fgets); + // Track non-stray, non-fatal error positions in list order. + if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && + error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) { + if (error_start < non_stray_min_start) non_stray_min_start = error_start; + continue; + } - pm_parser_free(parser); - pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); - node = pm_parse(parser); + // This is a stray token. Determine if it is a cascade effect + // of a preceding error or genuinely stray. + + // Rule (a): a non-stray error was seen earlier in the list at a + // strictly earlier position — this stray is a cascade effect. + if (non_stray_min_start < error_start) continue; + + // Rule (b): this stray is at EOF with valid code before it. + // Single-byte stray tokens at EOF (like `\` for line continuation) + // are likely truncated tokens. Multi-byte stray tokens (like the + // keyword `end`) need additional evidence that they are cascade + // effects (i.e. non-stray errors exist elsewhere). + if (at_eof && error_start > 0) { + // Exception: closing delimiters at EOF are genuinely stray. + if (error->location.length == 1) { + const uint8_t *byte = parser->start + error_start; + if (*byte == ')' || *byte == ']' || *byte == '}') { + parser->continuable = false; + return; + } + + // Single-byte non-delimiter stray at EOF: cascade. + continue; + } + + // Multi-byte stray at EOF: cascade only if there are + // non-stray errors (evidence of a preceding parse failure). + if (has_non_stray_error) continue; + } + + // Rule (c): a stray `=` at the start of a line could be the + // beginning of an embedded document (`=begin`). The remaining + // bytes after `=` parse as an identifier, so the error is not + // at EOF, but the construct is genuinely incomplete. + if (error->location.length == 1) { + const uint8_t *byte = parser->start + error_start; + if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue; + } + + // This stray token is genuinely non-continuable. + parser->continuable = false; + return; } +} +/** + * Parse the Ruby source associated with the given parser and return the tree. + */ +pm_node_t * +pm_parse(pm_parser_t *parser) { + pm_node_t *node = parse_program(parser); + pm_parse_continuable(parser); return node; } /** - * Parse the source and return true if it parses without errors or warnings. + * Parse a stream of Ruby source and return the tree. + * + * Prism is designed around having the entire source in memory at once, but you + * can stream stdin in to Ruby so we need to support a streaming API. */ -PRISM_EXPORTED_FUNCTION bool -pm_parse_success_p(const uint8_t *source, size_t size, const char *data) { - pm_options_t options = { 0 }; - pm_options_read(&options, data); +pm_node_t * +pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) { + bool eof = pm_source_stream_read(source); - pm_parser_t parser; - pm_parser_init(&parser, source, size, &options); + pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options); + pm_node_t *node = pm_parse(tmp); - pm_node_t *node = pm_parse(&parser); - pm_node_destroy(&parser, node); + while (!eof && tmp->error_list.size > 0) { + eof = pm_source_stream_read(source); - bool result = parser.error_list.size == 0; - pm_parser_free(&parser); - pm_options_free(&options); + pm_parser_free(tmp); + pm_arena_cleanup(arena); - return result; + tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options); + node = pm_parse(tmp); + } + + *parser = tmp; + return node; } #undef PM_CASE_KEYWORD #undef PM_CASE_OPERATOR #undef PM_CASE_WRITABLE #undef PM_STRING_EMPTY -#undef PM_LOCATION_NODE_BASE_VALUE -#undef PM_LOCATION_NODE_VALUE -#undef PM_LOCATION_NULL_VALUE -#undef PM_LOCATION_TOKEN_VALUE // We optionally support serializing to a binary string. For systems that don't // want or need this functionality, it can be turned off with the // PRISM_EXCLUDE_SERIALIZATION define. #ifndef PRISM_EXCLUDE_SERIALIZATION -static inline void +static PRISM_INLINE void pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_string(buffer, "PRISM", 5); pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR); @@ -22906,7 +22865,7 @@ pm_serialize_header(pm_buffer_t *buffer) { /** * Serialize the AST represented by the given node to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_header(buffer); pm_serialize_content(parser, node, buffer); @@ -22917,13 +22876,14 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { * Parse and serialize the AST represented by the given source to the given * buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); + pm_arena_t arena = { 0 }; pm_parser_t parser; - pm_parser_init(&parser, source, size, &options); + pm_parser_init(&arena, &parser, source, size, &options); pm_node_t *node = pm_parse(&parser); @@ -22931,216 +22891,53 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons pm_serialize_content(&parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); - pm_node_destroy(&parser, node); - pm_parser_free(&parser); - pm_options_free(&options); + pm_parser_cleanup(&parser); + pm_arena_cleanup(&arena); + pm_options_cleanup(&options); } /** * Parse and serialize the AST represented by the source that is read out of the * given stream into to the given buffer. */ -PRISM_EXPORTED_FUNCTION void -pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, const char *data) { - pm_parser_t parser; +void +pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) { + pm_arena_t arena = { 0 }; + pm_parser_t *parser; pm_options_t options = { 0 }; pm_options_read(&options, data); - pm_buffer_t parser_buffer; - pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, stream_fgets, &options); + pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options); pm_serialize_header(buffer); - pm_serialize_content(&parser, node, buffer); + pm_serialize_content(parser, node, buffer); pm_buffer_append_byte(buffer, '\0'); - pm_node_destroy(&parser, node); - pm_buffer_free(&parser_buffer); - pm_parser_free(&parser); - pm_options_free(&options); + pm_parser_free(parser); + pm_arena_cleanup(&arena); + pm_options_cleanup(&options); } /** * Parse and serialize the comments in the given source to the given buffer. */ -PRISM_EXPORTED_FUNCTION void +void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; pm_options_read(&options, data); + pm_arena_t arena = { 0 }; pm_parser_t parser; - pm_parser_init(&parser, source, size, &options); + pm_parser_init(&arena, &parser, source, size, &options); - pm_node_t *node = pm_parse(&parser); + pm_parse(&parser); pm_serialize_header(buffer); pm_serialize_encoding(parser.encoding, buffer); pm_buffer_append_varsint(buffer, parser.start_line); - pm_serialize_comment_list(&parser, &parser.comment_list, buffer); + pm_serialize_comment_list(&parser.comment_list, buffer); - pm_node_destroy(&parser, node); - pm_parser_free(&parser); - pm_options_free(&options); + pm_parser_cleanup(&parser); + pm_arena_cleanup(&arena); + pm_options_cleanup(&options); } #endif - -/******************************************************************************/ -/* Slice queries for the Ruby API */ -/******************************************************************************/ - -/** The category of slice returned from pm_slice_type. */ -typedef enum { - /** Returned when the given encoding name is invalid. */ - PM_SLICE_TYPE_ERROR = -1, - - /** Returned when no other types apply to the slice. */ - PM_SLICE_TYPE_NONE, - - /** Returned when the slice is a valid local variable name. */ - PM_SLICE_TYPE_LOCAL, - - /** Returned when the slice is a valid constant name. */ - PM_SLICE_TYPE_CONSTANT, - - /** Returned when the slice is a valid method name. */ - PM_SLICE_TYPE_METHOD_NAME -} pm_slice_type_t; - -/** - * Check that the slice is a valid local variable name or constant. - */ -pm_slice_type_t -pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) { - // first, get the right encoding object - const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name))); - if (encoding == NULL) return PM_SLICE_TYPE_ERROR; - - // check that there is at least one character - if (length == 0) return PM_SLICE_TYPE_NONE; - - size_t width; - if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) { - // valid because alphabetical - } else if (*source == '_') { - // valid because underscore - width = 1; - } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) { - // valid because multibyte - } else { - // invalid because no match - return PM_SLICE_TYPE_NONE; - } - - // determine the type of the slice based on the first character - const uint8_t *end = source + length; - pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL; - - // next, iterate through all of the bytes of the string to ensure that they - // are all valid identifier characters - source += width; - - while (source < end) { - if ((width = encoding->alnum_char(source, end - source)) != 0) { - // valid because alphanumeric - source += width; - } else if (*source == '_') { - // valid because underscore - source++; - } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) { - // valid because multibyte - source += width; - } else { - // invalid because no match - break; - } - } - - // accept a ! or ? at the end of the slice as a method name - if (*source == '!' || *source == '?' || *source == '=') { - source++; - result = PM_SLICE_TYPE_METHOD_NAME; - } - - // valid if we are at the end of the slice - return source == end ? result : PM_SLICE_TYPE_NONE; -} - -/** - * Check that the slice is a valid local variable name. - */ -PRISM_EXPORTED_FUNCTION pm_string_query_t -pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) { - switch (pm_slice_type(source, length, encoding_name)) { - case PM_SLICE_TYPE_ERROR: - return PM_STRING_QUERY_ERROR; - case PM_SLICE_TYPE_NONE: - case PM_SLICE_TYPE_CONSTANT: - case PM_SLICE_TYPE_METHOD_NAME: - return PM_STRING_QUERY_FALSE; - case PM_SLICE_TYPE_LOCAL: - return PM_STRING_QUERY_TRUE; - } - - assert(false && "unreachable"); - return PM_STRING_QUERY_FALSE; -} - -/** - * Check that the slice is a valid constant name. - */ -PRISM_EXPORTED_FUNCTION pm_string_query_t -pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) { - switch (pm_slice_type(source, length, encoding_name)) { - case PM_SLICE_TYPE_ERROR: - return PM_STRING_QUERY_ERROR; - case PM_SLICE_TYPE_NONE: - case PM_SLICE_TYPE_LOCAL: - case PM_SLICE_TYPE_METHOD_NAME: - return PM_STRING_QUERY_FALSE; - case PM_SLICE_TYPE_CONSTANT: - return PM_STRING_QUERY_TRUE; - } - - assert(false && "unreachable"); - return PM_STRING_QUERY_FALSE; -} - -/** - * Check that the slice is a valid method name. - */ -PRISM_EXPORTED_FUNCTION pm_string_query_t -pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) { -#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE) -#define C1(c) (*source == c) -#define C2(s) (memcmp(source, s, 2) == 0) -#define C3(s) (memcmp(source, s, 3) == 0) - - switch (pm_slice_type(source, length, encoding_name)) { - case PM_SLICE_TYPE_ERROR: - return PM_STRING_QUERY_ERROR; - case PM_SLICE_TYPE_NONE: - break; - case PM_SLICE_TYPE_LOCAL: - // numbered parameters are not valid method names - return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1])); - case PM_SLICE_TYPE_CONSTANT: - // all constants are valid method names - case PM_SLICE_TYPE_METHOD_NAME: - // all method names are valid method names - return PM_STRING_QUERY_TRUE; - } - - switch (length) { - case 1: - return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~')); - case 2: - return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**")); - case 3: - return B(C3("===") || C3("<=>") || C3("[]=")); - default: - return PM_STRING_QUERY_FALSE; - } - -#undef B -#undef C1 -#undef C2 -#undef C3 -} |
